Merge pull request #9613 from filecoin-project/feat/wdpost-robustness

feat: storage: Force exit GenerateSingleVanillaProof on cancelled context
This commit is contained in:
Łukasz Magiera 2022-11-15 10:30:25 +01:00 committed by GitHub
commit 0e07fa94d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 5 deletions

35
lib/result/result.go Normal file
View File

@ -0,0 +1,35 @@
package result
// Result is a small wrapper type encapsulating Value/Error tuples, mostly for
// use when sending values across channels
// NOTE: Avoid adding any functionality to this, any "nice" things added here will
// make it more difficult to switch to a more standardised Result-like type when
// one gets into the stdlib, or when we will want to switch to a library providing
// those types.
type Result[T any] struct {
Value T
Error error
}
func Ok[T any](value T) Result[T] {
return Result[T]{
Value: value,
}
}
func Err[T any](err error) Result[T] {
return Result[T]{
Error: err,
}
}
func Wrap[T any](value T, err error) Result[T] {
return Result[T]{
Value: value,
Error: err,
}
}
func (r *Result[T]) Unwrap() (T, error) {
return r.Value, r.Error
}

View File

@ -17,6 +17,7 @@ import (
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/proof"
"github.com/filecoin-project/lotus/lib/result"
"github.com/filecoin-project/lotus/storage/sealer/fsutil"
"github.com/filecoin-project/lotus/storage/sealer/storiface"
)
@ -758,20 +759,22 @@ func (st *Local) GenerateSingleVanillaProof(ctx context.Context, minerID abi.Act
ProofType: si.SealProof,
}
var cache string
var sealed string
var cache, sealed, cacheID, sealedID string
if si.Update {
src, _, err := st.AcquireSector(ctx, sr, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
src, si, err := st.AcquireSector(ctx, sr, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
return nil, xerrors.Errorf("acquire sector: %w", err)
}
cache, sealed = src.UpdateCache, src.Update
cacheID, sealedID = si.UpdateCache, si.Update
} else {
src, _, err := st.AcquireSector(ctx, sr, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
src, si, err := st.AcquireSector(ctx, sr, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
return nil, xerrors.Errorf("acquire sector: %w", err)
}
cache, sealed = src.Cache, src.Sealed
cacheID, sealedID = si.Cache, si.Sealed
}
if sealed == "" || cache == "" {
@ -789,7 +792,22 @@ func (st *Local) GenerateSingleVanillaProof(ctx context.Context, minerID abi.Act
SealedSectorPath: sealed,
}
return ffi.GenerateSingleVanillaProof(psi, si.Challenge)
start := time.Now()
resCh := make(chan result.Result[[]byte], 1)
go func() {
resCh <- result.Wrap(ffi.GenerateSingleVanillaProof(psi, si.Challenge))
}()
select {
case r := <-resCh:
return r.Unwrap()
case <-ctx.Done():
log.Errorw("failed to generate valilla PoSt proof before context cancellation", "err", ctx.Err(), "duration", time.Now().Sub(start), "cache-id", cacheID, "sealed-id", sealedID, "cache", cache, "sealed", sealed)
// this will leave the GenerateSingleVanillaProof goroutine hanging, but that's still less bad than failing PoSt
return nil, xerrors.Errorf("failed to generate vanilla proof before context cancellation: %w", ctx.Err())
}
}
var _ Store = &Local{}

View File

@ -7,6 +7,7 @@ import (
"github.com/ipfs/go-cid"
"go.opencensus.io/trace"
"go.uber.org/zap"
"golang.org/x/xerrors"
"github.com/filecoin-project/go-address"
@ -253,6 +254,14 @@ func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, manual bool, di
ctx, span := trace.StartSpan(ctx, "storage.runPoStCycle")
defer span.End()
start := time.Now()
log := log.WithOptions(zap.Fields(zap.Time("cycle", start)))
log.Infow("starting PoSt cycle", "manual", manual, "ts", ts, "deadline", di.Index)
defer func() {
log.Infow("post cycle done", "took", time.Now().Sub(start))
}()
if !manual {
// TODO: extract from runPoStCycle, run on fault cutoff boundaries
s.asyncFaultRecover(di, ts)