Merge pull request #8177 from filecoin-project/fix/fault-tracker-snap-deals

fix:proving:post check sector handles snap deals replica faults
This commit is contained in:
ZenGround0 2022-02-24 11:30:10 -07:00 committed by GitHub
commit d711775cdf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 69 additions and 58 deletions

View File

@ -256,7 +256,7 @@ type StorageMiner interface {
// the path specified when calling CreateBackup is within the base path
CreateBackup(ctx context.Context, fpath string) error //perm:admin
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, expensive bool) (map[abi.SectorNumber]string, error) //perm:admin
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, expensive bool) (map[abi.SectorNumber]string, error) //perm:admin
ComputeProof(ctx context.Context, ssi []builtin.ExtendedSectorInfo, rand abi.PoStRandomness, poStEpoch abi.ChainEpoch, nv abinetwork.Version) ([]builtin.PoStProof, error) //perm:read
}

View File

@ -627,7 +627,7 @@ type StorageMinerStruct struct {
ActorSectorSize func(p0 context.Context, p1 address.Address) (abi.SectorSize, error) `perm:"read"`
CheckProvable func(p0 context.Context, p1 abi.RegisteredPoStProof, p2 []storage.SectorRef, p3 bool) (map[abi.SectorNumber]string, error) `perm:"admin"`
CheckProvable func(p0 context.Context, p1 abi.RegisteredPoStProof, p2 []storage.SectorRef, p3 []bool, p4 bool) (map[abi.SectorNumber]string, error) `perm:"admin"`
ComputeProof func(p0 context.Context, p1 []builtin.ExtendedSectorInfo, p2 abi.PoStRandomness, p3 abi.ChainEpoch, p4 abinetwork.Version) ([]builtin.PoStProof, error) `perm:"read"`
@ -3760,14 +3760,14 @@ func (s *StorageMinerStub) ActorSectorSize(p0 context.Context, p1 address.Addres
return *new(abi.SectorSize), ErrNotSupported
}
func (s *StorageMinerStruct) CheckProvable(p0 context.Context, p1 abi.RegisteredPoStProof, p2 []storage.SectorRef, p3 bool) (map[abi.SectorNumber]string, error) {
func (s *StorageMinerStruct) CheckProvable(p0 context.Context, p1 abi.RegisteredPoStProof, p2 []storage.SectorRef, p3 []bool, p4 bool) (map[abi.SectorNumber]string, error) {
if s.Internal.CheckProvable == nil {
return *new(map[abi.SectorNumber]string), ErrNotSupported
}
return s.Internal.CheckProvable(p0, p1, p2, p3)
return s.Internal.CheckProvable(p0, p1, p2, p3, p4)
}
func (s *StorageMinerStub) CheckProvable(p0 context.Context, p1 abi.RegisteredPoStProof, p2 []storage.SectorRef, p3 bool) (map[abi.SectorNumber]string, error) {
func (s *StorageMinerStub) CheckProvable(p0 context.Context, p1 abi.RegisteredPoStProof, p2 []storage.SectorRef, p3 []bool, p4 bool) (map[abi.SectorNumber]string, error) {
return *new(map[abi.SectorNumber]string), ErrNotSupported
}

View File

@ -57,7 +57,7 @@ var (
FullAPIVersion0 = newVer(1, 5, 0)
FullAPIVersion1 = newVer(2, 2, 0)
MinerAPIVersion0 = newVer(1, 3, 0)
MinerAPIVersion0 = newVer(1, 4, 0)
WorkerAPIVersion0 = newVer(1, 5, 0)
)

Binary file not shown.

View File

@ -437,6 +437,7 @@ var provingCheckProvableCmd = &cli.Command{
}
var tocheck []storage.SectorRef
var update []bool
for _, info := range sectorInfos {
si := abi.SectorID{
Miner: abi.ActorID(mid),
@ -454,9 +455,10 @@ var provingCheckProvableCmd = &cli.Command{
ProofType: info.SealProof,
ID: si,
})
update = append(update, info.SectorKeyCID != nil)
}
bad, err := sapi.CheckProvable(ctx, info.WindowPoStProofType, tocheck, cctx.Bool("slow"))
bad, err := sapi.CheckProvable(ctx, info.WindowPoStProofType, tocheck, update, cctx.Bool("slow"))
if err != nil {
return err
}

View File

@ -338,6 +338,9 @@ Inputs:
"ProofType": 8
}
],
[
true
],
true
]
```

View File

@ -19,11 +19,11 @@ import (
// FaultTracker TODO: Track things more actively
type FaultTracker interface {
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error)
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error)
}
// CheckProvable returns unprovable sectors
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
var bad = make(map[abi.SectorID]string)
ssize, err := pp.SectorSize()
@ -32,72 +32,76 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
}
// TODO: More better checks
for _, sector := range sectors {
for i, sector := range sectors {
err := func() error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var fReplica string
var fCache string
locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}
if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}
lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
// temporary hack to make the check work with snapdeals
// will go away in https://github.com/filecoin-project/lotus/pull/7971
if lp.Sealed == "" || lp.Cache == "" {
// maybe it's update
if update[i] {
lockedUpdate, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}
if lockedUpdate {
lp, _, err = m.localStore.AcquireSector(ctx, sector, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
lp.Sealed, lp.Cache = lp.Update, lp.UpdateCache
if !lockedUpdate {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock on update replica", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}
lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector update replica in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
fReplica, fCache = lp.Update, lp.UpdateCache
} else {
locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}
if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}
lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
fReplica, fCache = lp.Sealed, lp.Cache
}
if lp.Sealed == "" || lp.Cache == "" {
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", lp.Cache, lp.Sealed)
if fReplica == "" || fCache == "" {
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", fReplica, "cache", fCache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", fCache, fReplica)
return nil
}
toCheck := map[string]int64{
lp.Sealed: 1,
filepath.Join(lp.Cache, "p_aux"): 0,
fReplica: 1,
filepath.Join(fCache, "p_aux"): 0,
}
addCachePathsForSectorSize(toCheck, lp.Cache, ssize)
addCachePathsForSectorSize(toCheck, fCache, ssize)
for p, sz := range toCheck {
st, err := os.Stat(p)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "err", err)
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", fReplica, "cache", fCache, "file", p, "err", err)
bad[sector.ID] = fmt.Sprintf("%s", err)
return nil
}
if sz != 0 {
if st.Size() != int64(ssize)*sz {
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", fReplica, "cache", fCache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
bad[sector.ID] = fmt.Sprintf("%s is wrong size (got %d, expect %d)", p, st.Size(), int64(ssize)*sz)
return nil
}
@ -118,14 +122,14 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
sector.ID.Number,
})
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("generating fallback challenges: %s", err)
return nil
}
commr, err := rg(ctx, sector.ID)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("getting commR: %s", err)
return nil
}
@ -136,12 +140,12 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
SectorNumber: sector.ID.Number,
SealedCID: commr,
},
CacheDirPath: lp.Cache,
CacheDirPath: fCache,
PoStProofType: wpp,
SealedSectorPath: lp.Sealed,
SealedSectorPath: fReplica,
}, ch.Challenges[sector.ID.Number])
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("generating vanilla proof: %s", err)
return nil
}

View File

@ -505,7 +505,7 @@ func (mgr *SectorMgr) Remove(ctx context.Context, sector storage.SectorRef) erro
return nil
}
func (mgr *SectorMgr) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, ids []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (mgr *SectorMgr) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, ids []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
bad := map[abi.SectorID]string{}
for _, sid := range ids {

View File

@ -1127,7 +1127,7 @@ func (sm *StorageMinerAPI) CreateBackup(ctx context.Context, fpath string) error
return backup(ctx, sm.DS, fpath)
}
func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []sto.SectorRef, expensive bool) (map[abi.SectorNumber]string, error) {
func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []sto.SectorRef, update []bool, expensive bool) (map[abi.SectorNumber]string, error) {
var rg storiface.RGetter
if expensive {
rg = func(ctx context.Context, id abi.SectorID) (cid.Cid, error) {
@ -1143,7 +1143,7 @@ func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredP
}
}
bad, err := sm.StorageMgr.CheckProvable(ctx, pp, sectors, rg)
bad, err := sm.StorageMgr.CheckProvable(ctx, pp, sectors, update, rg)
if err != nil {
return nil, err
}

View File

@ -206,6 +206,7 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B
sectors := make(map[abi.SectorNumber]struct{})
var tocheck []storage.SectorRef
var update []bool
for _, info := range sectorInfos {
sectors[info.SectorNumber] = struct{}{}
tocheck = append(tocheck, storage.SectorRef{
@ -215,9 +216,10 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B
Number: info.SectorNumber,
},
})
update = append(update, info.SectorKeyCID != nil)
}
bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, nil)
bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, update, nil)
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("checking provable sectors: %w", err)
}

View File

@ -168,7 +168,7 @@ func (m mockVerif) GenerateWinningPoStSectorChallenge(context.Context, abi.Regis
type mockFaultTracker struct {
}
func (m mockFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (m mockFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
// Returns "bad" sectors so just return empty map meaning all sectors are good
return map[abi.SectorID]string{}, nil
}