lotus/storage/sealer/faults.go
ZenGround0 327760acff
chore: build: Merge/v22 into 21 for 23 (#10702)
* chore: update ffi to increase execution parallelism

* Don't enforce walking receipt tree during compaction

* fix: build: drop drand incentinet servers

* chore: release lotus v1.20.4

* Apply suggestions from code review

Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com>

* feat: Introduce nv19 skeleton

Update to go-state-types v0.11.0-alpha-1

Introduce dummy v11 actor bundles

Make new actors adapters

Add upgrade to Upgrade Schedules

make jen

Update to go-state-types v0.11.0-alpha-2

* feat: vm: switch to the new exec trace format (#10372)

This is now "FVM" native. Changes include:

1. Don't treat "trace" messages like off-chain messages. E.g., don't
include CIDs, versions, etc.
2. Include IPLD codecs where applicable.
3. Remove fields that aren't filled by the FVM (timing, some errors,
code locations, etc.).

* feat: implement FIP-0061

* Address review

* Add and test the FIP-0061 migration

* Update actors bundles to fip/20230406

* Update to go-state-types master

* Update to actors v11.0.0-rc1

* - Update go state types
- Keep current expiration defaults on creation, extension some tests
- Update ffi

* ffi experiment

* Integration nv19 migration

- Open splitstore in migration shed tool
- Update state root version

* Post rebase fixup

* Fix

* gen

* nv19 invariant checking

* Try fixig blockstore so bundle is loaded

* Debug

* Fix

* Make butterfly upgrades happen

* Another ffi experiment

* Fix copy paste error

* Actually schedule migration (#10656)

Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>

* Butterfly artifacts

* Set calibration net upgrade height

* Review Response

* Fix state tree version assert

* Quick butterfly upgrade to sanity check (#10660)

* Quick butterfly upgrade to sanity check

* Update butterfly artifacts

* Revert fake fix

* Give butterfly net correct genesis

* Butterfly artifacts

* Give time before upgrade

---------

Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>

* chore:releasepolish v1.22 release (#10666)

* Update butterfly artifacts

* register actors v11

* Update calibration upgrade time

* State inspection shed cmds

* Fix

* make gen

* Fix swallowed errors

* Lint fixup

---------

Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>

* v1.22.0-rc3

* bundle fix

* Feat/expedite nv19 (#10681)

* Update go-state-types

* Modify upgrade schedule and params

* Revert fip 0052

* Update gst

* docsgen

* fast butterfly migration to validate migration

* Correct epoch to match specified date

* Update actors v11

* Update changelog build version

* Update butterfly artifacts

* Fix lotus-miner init to work after upgrade

---------

Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>

* fix:deps:stable ffi for stable release  (#10698)

* Point to stable ffi for stable lotus release

* go mod tidy

---------

Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>

* Update CHANGELOG.md

Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com>

---------

Co-authored-by: Aayush Rajasekaran <arajasek94@gmail.com>
Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>
Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com>
Co-authored-by: Steven Allen <steven@stebalien.com>
Co-authored-by: jennijuju <jiayingw703@gmail.com>
2023-04-19 18:40:18 -04:00

138 lines
3.5 KiB
Go

package sealer
import (
"context"
"crypto/rand"
"fmt"
"sync"
"golang.org/x/xerrors"
ffi "github.com/filecoin-project/filecoin-ffi"
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/lotus/storage/sealer/storiface"
)
// FaultTracker TODO: Track things more actively
type FaultTracker interface {
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storiface.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error)
}
// CheckProvable returns unprovable sectors
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storiface.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
if rg == nil {
return nil, xerrors.Errorf("rg is nil")
}
var bad = make(map[abi.SectorID]string)
var badLk sync.Mutex
var postRand abi.PoStRandomness = make([]byte, abi.RandomnessLength)
_, _ = rand.Read(postRand)
postRand[31] &= 0x3f
limit := m.parallelCheckLimit
if limit <= 0 {
limit = len(sectors)
}
throttle := make(chan struct{}, limit)
addBad := func(s abi.SectorID, reason string) {
badLk.Lock()
bad[s] = reason
badLk.Unlock()
}
if m.partitionCheckTimeout > 0 {
var cancel2 context.CancelFunc
ctx, cancel2 = context.WithTimeout(ctx, m.partitionCheckTimeout)
defer cancel2()
}
var wg sync.WaitGroup
wg.Add(len(sectors))
for _, sector := range sectors {
select {
case throttle <- struct{}{}:
case <-ctx.Done():
addBad(sector.ID, fmt.Sprintf("waiting for check worker: %s", ctx.Err()))
wg.Done()
continue
}
go func(sector storiface.SectorRef) {
defer wg.Done()
defer func() {
<-throttle
}()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
commr, update, err := rg(ctx, sector.ID)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", "err", err)
addBad(sector.ID, fmt.Sprintf("getting commR: %s", err))
return
}
toLock := storiface.FTSealed | storiface.FTCache
if update {
toLock = storiface.FTUpdate | storiface.FTUpdateCache
}
locked, err := m.index.StorageTryLock(ctx, sector.ID, toLock, storiface.FTNone)
if err != nil {
addBad(sector.ID, fmt.Sprintf("tryLock error: %s", err))
return
}
if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
addBad(sector.ID, fmt.Sprint("can't acquire read lock"))
return
}
ch, err := ffi.GeneratePoStFallbackSectorChallenges(pp, sector.ID.Miner, postRand, []abi.SectorNumber{
sector.ID.Number,
})
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "err", err)
addBad(sector.ID, fmt.Sprintf("generating fallback challenges: %s", err))
return
}
vctx := ctx
if m.singleCheckTimeout > 0 {
var cancel2 context.CancelFunc
vctx, cancel2 = context.WithTimeout(ctx, m.singleCheckTimeout)
defer cancel2()
}
_, err = m.storage.GenerateSingleVanillaProof(vctx, sector.ID.Miner, storiface.PostSectorChallenge{
SealProof: sector.ProofType,
SectorNumber: sector.ID.Number,
SealedCID: commr,
Challenge: ch.Challenges[sector.ID.Number],
Update: update,
}, pp)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "err", err)
addBad(sector.ID, fmt.Sprintf("generating vanilla proof: %s", err))
return
}
}(sector)
}
wg.Wait()
return bad, nil
}
var _ FaultTracker = &Manager{}