itests: Fix flaky TestWindowPostWorkerSkipBadSector

This commit is contained in:
Łukasz Magiera 2022-03-18 19:53:59 +01:00
parent 7b81369c8c
commit 36f1dd7bb3
4 changed files with 19 additions and 17 deletions

View File

@ -5,7 +5,7 @@ import (
"sort"
"sync"
"github.com/hashicorp/go-multierror"
"go.uber.org/multierr"
"golang.org/x/xerrors"
ffi "github.com/filecoin-project/filecoin-ffi"
@ -187,8 +187,7 @@ func (m *Manager) generateWindowPoSt(ctx context.Context, minerID abi.ActorID, s
skipped = append(skipped, sk...)
if err != nil {
retErr = multierror.Append(retErr, xerrors.Errorf("partitionCount:%d err:%+v", partIdx, err))
return
retErr = multierr.Append(retErr, xerrors.Errorf("partitionCount:%d err:%+v", partIdx, err))
}
flk.Unlock()
}
@ -199,13 +198,13 @@ func (m *Manager) generateWindowPoSt(ctx context.Context, minerID abi.ActorID, s
wg.Wait()
postProofs, err := ffi.MergeWindowPoStPartitionProofs(ppt, proofList)
if err != nil {
return nil, nil, xerrors.Errorf("merge windowPoSt partition proofs: %v", err)
if len(skipped) > 0 {
return nil, skipped, multierr.Append(xerrors.Errorf("some sectors (%d) were skipped", len(skipped)), retErr)
}
if len(skipped) > 0 {
log.Warnf("GenerateWindowPoSt get skipped: %d", len(skipped))
postProofs, err := ffi.MergeWindowPoStPartitionProofs(ppt, proofList)
if err != nil {
return nil, skipped, xerrors.Errorf("merge windowPoSt partition proofs: %v", err)
}
out = append(out, *postProofs)

View File

@ -627,7 +627,7 @@ func (l *LocalWorker) GenerateWindowPoSt(ctx context.Context, ppt abi.Registered
Miner: mid,
Number: s.SectorNumber,
})
log.Errorf("get sector: %d, vanilla: %s, err: %s", s.SectorNumber, vanilla, err)
log.Errorf("reading PoSt challenge for sector %d, vlen:%d, err: %s", s.SectorNumber, len(vanilla), err)
return
}
@ -639,7 +639,10 @@ func (l *LocalWorker) GenerateWindowPoSt(ctx context.Context, ppt abi.Registered
if len(skipped) > 0 {
// This should happen rarely because before entering GenerateWindowPoSt we check all sectors by reading challenges.
// When it does happen, window post runner logic will just re-check sectors, and retry with newly-discovered-bad sectors skipped
return storiface.WindowPoStResult{Skipped: skipped}, xerrors.Errorf("couldn't read some challenges (skipped %d)", len(skipped))
log.Errorf("couldn't read some challenges (skipped %d)", len(skipped))
// note: can't return an error as this in an jsonrpc call
return storiface.WindowPoStResult{Skipped: skipped}, nil
}
res, err := sb.GenerateWindowPoStWithVanilla(ctx, ppt, mid, randomness, vproofs, partitionIdx)

View File

@ -191,7 +191,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
sectors := 2 * 48 * 2
badsector := new(uint64)
var badsector uint64 = 100000
client, miner, _, ens := kit.EnsembleWorker(t,
kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines
@ -201,14 +201,14 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
kit.WithWorkerStorage(func(store stores.Store) stores.Store {
return &badWorkerStorage{
Store: store,
badsector: badsector,
badsector: &badsector,
}
}),
kit.ConstructorOpts(node.ApplyIf(node.IsType(repo.StorageMiner),
node.Override(new(stores.Store), func(store *stores.Remote) stores.Store {
return &badWorkerStorage{
Store: store,
badsector: badsector,
badsector: &badsector,
notBadCount: 1,
}
}))))
@ -219,7 +219,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
require.NoError(t, err)
bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0]
bm := ens.InterconnectAll().BeginMiningMustPost(2 * time.Millisecond)[0]
di = di.NextNotElapsed()
@ -245,7 +245,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
t.Log("post message landed")
bm.MineBlocks(ctx, 2*time.Millisecond)
bm.MineBlocksMustPost(ctx, 2*time.Millisecond)
waitUntil = di.Open + di.WPoStChallengeWindow*3
t.Logf("End for head.Height > %d", waitUntil)
@ -282,7 +282,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
t.Logf("Drop sector %d; dl %d part %d", sid, di.Index+1, 0)
atomic.StoreUint64(badsector, sid)
atomic.StoreUint64(&badsector, sid)
require.NoError(t, err)
}

View File

@ -652,7 +652,7 @@ func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, di dline.Info, t
postOut, ps, err := s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), xsinfos, append(abi.PoStRandomness{}, rand...))
elapsed := time.Since(tsStart)
log.Infow("computing window post", "batch", batchIdx, "elapsed", elapsed)
log.Infow("computing window post", "batch", batchIdx, "elapsed", elapsed, "skip", len(ps), "err", err)
if err != nil {
log.Errorf("error generating window post: %s", err)
}