From 7b81369c8c8e4739ee3ce04315dd1a5430303bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 18 Mar 2022 19:07:39 +0100 Subject: [PATCH] wdpost: Don't attempt to snark with skipped sectors --- .../sector-storage/ffiwrapper/sealer_test.go | 13 +++++++ extern/sector-storage/manager.go | 4 +- extern/sector-storage/worker_local.go | 11 ++---- itests/worker_test.go | 38 +++++++++++++------ node/builder_miner.go | 1 + node/modules/storageminer.go | 2 +- storage/wdpost_run.go | 7 ---- 7 files changed, 47 insertions(+), 29 deletions(-) diff --git a/extern/sector-storage/ffiwrapper/sealer_test.go b/extern/sector-storage/ffiwrapper/sealer_test.go index 5a2a42af9..e8848e735 100644 --- a/extern/sector-storage/ffiwrapper/sealer_test.go +++ b/extern/sector-storage/ffiwrapper/sealer_test.go @@ -948,6 +948,7 @@ func TestMulticoreSDR(t *testing.T) { func TestPoStChallengeAssumptions(t *testing.T) { var r [32]byte rand.Read(r[:]) + r[31] &= 0x3f // behaves like a pure function { @@ -977,6 +978,18 @@ func TestPoStChallengeAssumptions(t *testing.T) { require.NotEqual(t, c1.Challenges[4], c2.Challenges[4]) } + // length doesn't matter + { + c1, err := ffi.GeneratePoStFallbackSectorChallenges(abi.RegisteredPoStProof_StackedDrgWindow32GiBV1, 1000, r[:], []abi.SectorNumber{1}) + require.NoError(t, err) + + c2, err := ffi.GeneratePoStFallbackSectorChallenges(abi.RegisteredPoStProof_StackedDrgWindow32GiBV1, 1000, r[:], []abi.SectorNumber{1, 2}) + require.NoError(t, err) + + require.NotEqual(t, c1, c2) + require.Equal(t, c1.Challenges[1], c2.Challenges[1]) + } + // generate dedupes { c1, err := ffi.GeneratePoStFallbackSectorChallenges(abi.RegisteredPoStProof_StackedDrgWindow32GiBV1, 1000, r[:], []abi.SectorNumber{1, 2, 1, 4}) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 6cc9bd2e5..0e0387f57 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -56,7 +56,7 @@ var ClosedWorkerID = uuid.UUID{} type Manager struct { ls stores.LocalStorage - storage *stores.Remote + storage stores.Store localStore *stores.Local remoteHnd *stores.FetchHandler index stores.SectorIndex @@ -123,7 +123,7 @@ type StorageAuth http.Header type WorkerStateStore *statestore.StateStore type ManagerStateStore *statestore.StateStore -func New(ctx context.Context, lstor *stores.Local, stor *stores.Remote, ls stores.LocalStorage, si stores.SectorIndex, sc SealerConfig, wss WorkerStateStore, mss ManagerStateStore) (*Manager, error) { +func New(ctx context.Context, lstor *stores.Local, stor stores.Store, ls stores.LocalStorage, si stores.SectorIndex, sc SealerConfig, wss WorkerStateStore, mss ManagerStateStore) (*Manager, error) { prover, err := ffiwrapper.New(&readonlyProvider{stor: lstor, index: si}) if err != nil { return nil, xerrors.Errorf("creating prover instance: %w", err) diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index c272bce47..20e0dc29d 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -636,14 +636,11 @@ func (l *LocalWorker) GenerateWindowPoSt(ctx context.Context, ppt abi.Registered } wg.Wait() - var at = 0 - for i := range vproofs { - if vproofs[i] != nil { - vproofs[at] = vproofs[i] - at++ - } + if len(skipped) > 0 { + // This should happen rarely because before entering GenerateWindowPoSt we check all sectors by reading challenges. + // When it does happen, window post runner logic will just re-check sectors, and retry with newly-discovered-bad sectors skipped + return storiface.WindowPoStResult{Skipped: skipped}, xerrors.Errorf("couldn't read some challenges (skipped %d)", len(skipped)) } - vproofs = vproofs[:at] res, err := sb.GenerateWindowPoStWithVanilla(ctx, ppt, mid, randomness, vproofs, partitionIdx) diff --git a/itests/worker_test.go b/itests/worker_test.go index f1b1e1310..1ec2f836d 100644 --- a/itests/worker_test.go +++ b/itests/worker_test.go @@ -2,13 +2,12 @@ package itests import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" - "golang.org/x/xerrors" "sync/atomic" "testing" "time" + "golang.org/x/xerrors" + logging "github.com/ipfs/go-log/v2" "github.com/stretchr/testify/require" @@ -17,8 +16,12 @@ import ( "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" + "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/lotus/itests/kit" + "github.com/filecoin-project/lotus/node" "github.com/filecoin-project/lotus/node/impl" + "github.com/filecoin-project/lotus/node/repo" "github.com/filecoin-project/lotus/storage" storage2 "github.com/filecoin-project/specs-storage/storage" ) @@ -166,12 +169,16 @@ func TestWindowPostWorker(t *testing.T) { type badWorkerStorage struct { stores.Store - badsector uint64 + badsector *uint64 + notBadCount int } func (bs *badWorkerStorage) GenerateSingleVanillaProof(ctx context.Context, minerID abi.ActorID, si storiface.PostSectorChallenge, ppt abi.RegisteredPoStProof) ([]byte, error) { - if atomic.LoadUint64(&bs.badsector) == uint64(si.SectorNumber) { - return nil, xerrors.New("no proof for you") + if atomic.LoadUint64(bs.badsector) == uint64(si.SectorNumber) { + bs.notBadCount-- + if bs.notBadCount < 0 { + return nil, xerrors.New("no proof for you") + } } return bs.Store.GenerateSingleVanillaProof(ctx, minerID, si, ppt) } @@ -184,7 +191,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) { sectors := 2 * 48 * 2 - var badStore *badWorkerStorage + badsector := new(uint64) client, miner, _, ens := kit.EnsembleWorker(t, kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines @@ -192,12 +199,19 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) { kit.ThroughRPC(), kit.WithTaskTypes([]sealtasks.TaskType{sealtasks.TTGenerateWindowPoSt}), kit.WithWorkerStorage(func(store stores.Store) stores.Store { - badStore = &badWorkerStorage{ + return &badWorkerStorage{ Store: store, - badsector: 10000, + badsector: badsector, } - return badStore - })) + }), + kit.ConstructorOpts(node.ApplyIf(node.IsType(repo.StorageMiner), + node.Override(new(stores.Store), func(store *stores.Remote) stores.Store { + return &badWorkerStorage{ + Store: store, + badsector: badsector, + notBadCount: 1, + } + })))) maddr, err := miner.ActorAddress(ctx) require.NoError(t, err) @@ -268,7 +282,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) { t.Logf("Drop sector %d; dl %d part %d", sid, di.Index+1, 0) - atomic.StoreUint64(&badStore.badsector, sid) + atomic.StoreUint64(badsector, sid) require.NoError(t, err) } diff --git a/node/builder_miner.go b/node/builder_miner.go index d24beff80..658cb0b65 100644 --- a/node/builder_miner.go +++ b/node/builder_miner.go @@ -83,6 +83,7 @@ func ConfigStorageMiner(c interface{}) Option { Override(new(stores.LocalStorage), From(new(repo.LockedRepo))), Override(new(*stores.Local), modules.LocalStorage), Override(new(*stores.Remote), modules.RemoteStorage), + Override(new(stores.Store), From(new(*stores.Remote))), Override(new(dtypes.RetrievalPricingFunc), modules.RetrievalPricingFunc(cfg.Dealmaking)), If(!cfg.Subsystems.EnableMining, diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index 05d41a427..9462357eb 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -721,7 +721,7 @@ func RemoteStorage(lstor *stores.Local, si stores.SectorIndex, sa sectorstorage. return stores.NewRemote(lstor, si, http.Header(sa), sc.ParallelFetchLimit, &stores.DefaultPartialFileHandler{}) } -func SectorStorage(mctx helpers.MetricsCtx, lc fx.Lifecycle, lstor *stores.Local, stor *stores.Remote, ls stores.LocalStorage, si stores.SectorIndex, sc sectorstorage.SealerConfig, ds dtypes.MetadataDS) (*sectorstorage.Manager, error) { +func SectorStorage(mctx helpers.MetricsCtx, lc fx.Lifecycle, lstor *stores.Local, stor stores.Store, ls stores.LocalStorage, si stores.SectorIndex, sc sectorstorage.SealerConfig, ds dtypes.MetadataDS) (*sectorstorage.Manager, error) { ctx := helpers.LifecycleCtx(mctx, lc) wsts := statestore.New(namespace.Wrap(ds, WorkerCallsPrefix)) diff --git a/storage/wdpost_run.go b/storage/wdpost_run.go index a83945f17..a9e685b58 100644 --- a/storage/wdpost_run.go +++ b/storage/wdpost_run.go @@ -45,13 +45,6 @@ func (s *WindowPoStScheduler) recordPoStFailure(err error, ts *types.TipSet, dea State: SchedulerStateFaulted, } }) - - log.Errorf("Got err %+v - TODO handle errors", err) - /*s.failLk.Lock() - if eps > s.failed { - s.failed = eps - } - s.failLk.Unlock()*/ } // recordProofsEvent records a successful proofs_processed event in the