itests: Fix flaky TestWindowPostWorkerSkipBadSector

This commit is contained in:
Łukasz Magiera 2022-03-18 19:53:59 +01:00
parent 7b81369c8c
commit 36f1dd7bb3
4 changed files with 19 additions and 17 deletions

View File

@ -5,7 +5,7 @@ import (
"sort" "sort"
"sync" "sync"
"github.com/hashicorp/go-multierror" "go.uber.org/multierr"
"golang.org/x/xerrors" "golang.org/x/xerrors"
ffi "github.com/filecoin-project/filecoin-ffi" ffi "github.com/filecoin-project/filecoin-ffi"
@ -187,8 +187,7 @@ func (m *Manager) generateWindowPoSt(ctx context.Context, minerID abi.ActorID, s
skipped = append(skipped, sk...) skipped = append(skipped, sk...)
if err != nil { if err != nil {
retErr = multierror.Append(retErr, xerrors.Errorf("partitionCount:%d err:%+v", partIdx, err)) retErr = multierr.Append(retErr, xerrors.Errorf("partitionCount:%d err:%+v", partIdx, err))
return
} }
flk.Unlock() flk.Unlock()
} }
@ -199,13 +198,13 @@ func (m *Manager) generateWindowPoSt(ctx context.Context, minerID abi.ActorID, s
wg.Wait() wg.Wait()
postProofs, err := ffi.MergeWindowPoStPartitionProofs(ppt, proofList) if len(skipped) > 0 {
if err != nil { return nil, skipped, multierr.Append(xerrors.Errorf("some sectors (%d) were skipped", len(skipped)), retErr)
return nil, nil, xerrors.Errorf("merge windowPoSt partition proofs: %v", err)
} }
if len(skipped) > 0 { postProofs, err := ffi.MergeWindowPoStPartitionProofs(ppt, proofList)
log.Warnf("GenerateWindowPoSt get skipped: %d", len(skipped)) if err != nil {
return nil, skipped, xerrors.Errorf("merge windowPoSt partition proofs: %v", err)
} }
out = append(out, *postProofs) out = append(out, *postProofs)

View File

@ -627,7 +627,7 @@ func (l *LocalWorker) GenerateWindowPoSt(ctx context.Context, ppt abi.Registered
Miner: mid, Miner: mid,
Number: s.SectorNumber, Number: s.SectorNumber,
}) })
log.Errorf("get sector: %d, vanilla: %s, err: %s", s.SectorNumber, vanilla, err) log.Errorf("reading PoSt challenge for sector %d, vlen:%d, err: %s", s.SectorNumber, len(vanilla), err)
return return
} }
@ -639,7 +639,10 @@ func (l *LocalWorker) GenerateWindowPoSt(ctx context.Context, ppt abi.Registered
if len(skipped) > 0 { if len(skipped) > 0 {
// This should happen rarely because before entering GenerateWindowPoSt we check all sectors by reading challenges. // This should happen rarely because before entering GenerateWindowPoSt we check all sectors by reading challenges.
// When it does happen, window post runner logic will just re-check sectors, and retry with newly-discovered-bad sectors skipped // When it does happen, window post runner logic will just re-check sectors, and retry with newly-discovered-bad sectors skipped
return storiface.WindowPoStResult{Skipped: skipped}, xerrors.Errorf("couldn't read some challenges (skipped %d)", len(skipped)) log.Errorf("couldn't read some challenges (skipped %d)", len(skipped))
// note: can't return an error as this in an jsonrpc call
return storiface.WindowPoStResult{Skipped: skipped}, nil
} }
res, err := sb.GenerateWindowPoStWithVanilla(ctx, ppt, mid, randomness, vproofs, partitionIdx) res, err := sb.GenerateWindowPoStWithVanilla(ctx, ppt, mid, randomness, vproofs, partitionIdx)

View File

@ -191,7 +191,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
sectors := 2 * 48 * 2 sectors := 2 * 48 * 2
badsector := new(uint64) var badsector uint64 = 100000
client, miner, _, ens := kit.EnsembleWorker(t, client, miner, _, ens := kit.EnsembleWorker(t,
kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines
@ -201,14 +201,14 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
kit.WithWorkerStorage(func(store stores.Store) stores.Store { kit.WithWorkerStorage(func(store stores.Store) stores.Store {
return &badWorkerStorage{ return &badWorkerStorage{
Store: store, Store: store,
badsector: badsector, badsector: &badsector,
} }
}), }),
kit.ConstructorOpts(node.ApplyIf(node.IsType(repo.StorageMiner), kit.ConstructorOpts(node.ApplyIf(node.IsType(repo.StorageMiner),
node.Override(new(stores.Store), func(store *stores.Remote) stores.Store { node.Override(new(stores.Store), func(store *stores.Remote) stores.Store {
return &badWorkerStorage{ return &badWorkerStorage{
Store: store, Store: store,
badsector: badsector, badsector: &badsector,
notBadCount: 1, notBadCount: 1,
} }
})))) }))))
@ -219,7 +219,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
require.NoError(t, err) require.NoError(t, err)
bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0] bm := ens.InterconnectAll().BeginMiningMustPost(2 * time.Millisecond)[0]
di = di.NextNotElapsed() di = di.NextNotElapsed()
@ -245,7 +245,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
t.Log("post message landed") t.Log("post message landed")
bm.MineBlocks(ctx, 2*time.Millisecond) bm.MineBlocksMustPost(ctx, 2*time.Millisecond)
waitUntil = di.Open + di.WPoStChallengeWindow*3 waitUntil = di.Open + di.WPoStChallengeWindow*3
t.Logf("End for head.Height > %d", waitUntil) t.Logf("End for head.Height > %d", waitUntil)
@ -282,7 +282,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) {
t.Logf("Drop sector %d; dl %d part %d", sid, di.Index+1, 0) t.Logf("Drop sector %d; dl %d part %d", sid, di.Index+1, 0)
atomic.StoreUint64(badsector, sid) atomic.StoreUint64(&badsector, sid)
require.NoError(t, err) require.NoError(t, err)
} }

View File

@ -652,7 +652,7 @@ func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, di dline.Info, t
postOut, ps, err := s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), xsinfos, append(abi.PoStRandomness{}, rand...)) postOut, ps, err := s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), xsinfos, append(abi.PoStRandomness{}, rand...))
elapsed := time.Since(tsStart) elapsed := time.Since(tsStart)
log.Infow("computing window post", "batch", batchIdx, "elapsed", elapsed) log.Infow("computing window post", "batch", batchIdx, "elapsed", elapsed, "skip", len(ps), "err", err)
if err != nil { if err != nil {
log.Errorf("error generating window post: %s", err) log.Errorf("error generating window post: %s", err)
} }