lotus/storage/sealer/faults.go

138 lines
3.5 KiB
Go
Raw Normal View History

package sealer
2020-05-16 21:03:29 +00:00
import (
"context"
2020-12-01 23:32:01 +00:00
"crypto/rand"
"fmt"
2022-03-29 01:19:11 +00:00
"sync"
2022-01-18 11:11:59 +00:00
2020-05-16 21:03:29 +00:00
"golang.org/x/xerrors"
2020-12-01 23:32:01 +00:00
ffi "github.com/filecoin-project/filecoin-ffi"
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/lotus/storage/sealer/storiface"
2020-05-16 21:03:29 +00:00
)
2020-08-16 10:40:35 +00:00
// FaultTracker TODO: Track things more actively
2020-05-16 21:03:29 +00:00
type FaultTracker interface {
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storiface.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error)
2020-05-16 21:03:29 +00:00
}
2020-08-16 10:40:35 +00:00
// CheckProvable returns unprovable sectors
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storiface.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
2022-03-29 01:19:11 +00:00
ctx, cancel := context.WithCancel(ctx)
defer cancel()
2022-01-14 13:11:04 +00:00
if rg == nil {
return nil, xerrors.Errorf("rg is nil")
}
2020-11-26 07:02:43 +00:00
var bad = make(map[abi.SectorID]string)
2022-03-29 01:19:11 +00:00
var badLk sync.Mutex
var postRand abi.PoStRandomness = make([]byte, abi.RandomnessLength)
_, _ = rand.Read(postRand)
postRand[31] &= 0x3f
limit := m.parallelCheckLimit
if limit <= 0 {
limit = len(sectors)
}
throttle := make(chan struct{}, limit)
addBad := func(s abi.SectorID, reason string) {
badLk.Lock()
bad[s] = reason
badLk.Unlock()
}
if m.partitionCheckTimeout > 0 {
var cancel2 context.CancelFunc
ctx, cancel2 = context.WithTimeout(ctx, m.partitionCheckTimeout)
defer cancel2()
}
2022-03-29 01:19:11 +00:00
var wg sync.WaitGroup
wg.Add(len(sectors))
2020-05-16 21:03:29 +00:00
for _, sector := range sectors {
2022-03-29 01:19:11 +00:00
select {
case throttle <- struct{}{}:
case <-ctx.Done():
addBad(sector.ID, fmt.Sprintf("waiting for check worker: %s", ctx.Err()))
wg.Done()
continue
2022-03-29 01:19:11 +00:00
}
go func(sector storiface.SectorRef) {
2022-03-29 01:19:11 +00:00
defer wg.Done()
defer func() {
<-throttle
}()
2020-06-08 16:47:59 +00:00
ctx, cancel := context.WithCancel(ctx)
defer cancel()
commr, update, err := rg(ctx, sector.ID)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", "err", err)
2022-03-29 01:19:11 +00:00
addBad(sector.ID, fmt.Sprintf("getting commR: %s", err))
return
}
toLock := storiface.FTSealed | storiface.FTCache
if update {
toLock = storiface.FTUpdate | storiface.FTUpdateCache
}
locked, err := m.index.StorageTryLock(ctx, sector.ID, toLock, storiface.FTNone)
2020-06-08 16:47:59 +00:00
if err != nil {
2022-03-29 01:19:11 +00:00
addBad(sector.ID, fmt.Sprintf("tryLock error: %s", err))
return
2020-06-08 16:47:59 +00:00
}
if !locked {
2020-11-19 15:48:28 +00:00
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
2022-03-29 01:19:11 +00:00
addBad(sector.ID, fmt.Sprint("can't acquire read lock"))
return
2020-06-08 16:47:59 +00:00
}
chore: build: Merge/v22 into 21 for 23 (#10702) * chore: update ffi to increase execution parallelism * Don't enforce walking receipt tree during compaction * fix: build: drop drand incentinet servers * chore: release lotus v1.20.4 * Apply suggestions from code review Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com> * feat: Introduce nv19 skeleton Update to go-state-types v0.11.0-alpha-1 Introduce dummy v11 actor bundles Make new actors adapters Add upgrade to Upgrade Schedules make jen Update to go-state-types v0.11.0-alpha-2 * feat: vm: switch to the new exec trace format (#10372) This is now "FVM" native. Changes include: 1. Don't treat "trace" messages like off-chain messages. E.g., don't include CIDs, versions, etc. 2. Include IPLD codecs where applicable. 3. Remove fields that aren't filled by the FVM (timing, some errors, code locations, etc.). * feat: implement FIP-0061 * Address review * Add and test the FIP-0061 migration * Update actors bundles to fip/20230406 * Update to go-state-types master * Update to actors v11.0.0-rc1 * - Update go state types - Keep current expiration defaults on creation, extension some tests - Update ffi * ffi experiment * Integration nv19 migration - Open splitstore in migration shed tool - Update state root version * Post rebase fixup * Fix * gen * nv19 invariant checking * Try fixig blockstore so bundle is loaded * Debug * Fix * Make butterfly upgrades happen * Another ffi experiment * Fix copy paste error * Actually schedule migration (#10656) Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * Butterfly artifacts * Set calibration net upgrade height * Review Response * Fix state tree version assert * Quick butterfly upgrade to sanity check (#10660) * Quick butterfly upgrade to sanity check * Update butterfly artifacts * Revert fake fix * Give butterfly net correct genesis * Butterfly artifacts * Give time before upgrade --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * chore:releasepolish v1.22 release (#10666) * Update butterfly artifacts * register actors v11 * Update calibration upgrade time * State inspection shed cmds * Fix * make gen * Fix swallowed errors * Lint fixup --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * v1.22.0-rc3 * bundle fix * Feat/expedite nv19 (#10681) * Update go-state-types * Modify upgrade schedule and params * Revert fip 0052 * Update gst * docsgen * fast butterfly migration to validate migration * Correct epoch to match specified date * Update actors v11 * Update changelog build version * Update butterfly artifacts * Fix lotus-miner init to work after upgrade --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * fix:deps:stable ffi for stable release (#10698) * Point to stable ffi for stable lotus release * go mod tidy --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * Update CHANGELOG.md Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com> --------- Co-authored-by: Aayush Rajasekaran <arajasek94@gmail.com> Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com> Co-authored-by: Steven Allen <steven@stebalien.com> Co-authored-by: jennijuju <jiayingw703@gmail.com>
2023-04-19 22:40:18 +00:00
ch, err := ffi.GeneratePoStFallbackSectorChallenges(pp, sector.ID.Miner, postRand, []abi.SectorNumber{
2022-01-14 13:11:04 +00:00
sector.ID.Number,
})
2020-05-16 21:03:29 +00:00
if err != nil {
2022-01-14 13:11:04 +00:00
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "err", err)
2022-03-29 01:19:11 +00:00
addBad(sector.ID, fmt.Sprintf("generating fallback challenges: %s", err))
return
2020-05-16 21:03:29 +00:00
}
vctx := ctx
if m.singleCheckTimeout > 0 {
var cancel2 context.CancelFunc
vctx, cancel2 = context.WithTimeout(ctx, m.singleCheckTimeout)
defer cancel2()
}
_, err = m.storage.GenerateSingleVanillaProof(vctx, sector.ID.Miner, storiface.PostSectorChallenge{
2022-01-14 13:11:04 +00:00
SealProof: sector.ProofType,
SectorNumber: sector.ID.Number,
SealedCID: commr,
Challenge: ch.Challenges[sector.ID.Number],
2022-01-18 10:25:04 +00:00
Update: update,
chore: build: Merge/v22 into 21 for 23 (#10702) * chore: update ffi to increase execution parallelism * Don't enforce walking receipt tree during compaction * fix: build: drop drand incentinet servers * chore: release lotus v1.20.4 * Apply suggestions from code review Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com> * feat: Introduce nv19 skeleton Update to go-state-types v0.11.0-alpha-1 Introduce dummy v11 actor bundles Make new actors adapters Add upgrade to Upgrade Schedules make jen Update to go-state-types v0.11.0-alpha-2 * feat: vm: switch to the new exec trace format (#10372) This is now "FVM" native. Changes include: 1. Don't treat "trace" messages like off-chain messages. E.g., don't include CIDs, versions, etc. 2. Include IPLD codecs where applicable. 3. Remove fields that aren't filled by the FVM (timing, some errors, code locations, etc.). * feat: implement FIP-0061 * Address review * Add and test the FIP-0061 migration * Update actors bundles to fip/20230406 * Update to go-state-types master * Update to actors v11.0.0-rc1 * - Update go state types - Keep current expiration defaults on creation, extension some tests - Update ffi * ffi experiment * Integration nv19 migration - Open splitstore in migration shed tool - Update state root version * Post rebase fixup * Fix * gen * nv19 invariant checking * Try fixig blockstore so bundle is loaded * Debug * Fix * Make butterfly upgrades happen * Another ffi experiment * Fix copy paste error * Actually schedule migration (#10656) Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * Butterfly artifacts * Set calibration net upgrade height * Review Response * Fix state tree version assert * Quick butterfly upgrade to sanity check (#10660) * Quick butterfly upgrade to sanity check * Update butterfly artifacts * Revert fake fix * Give butterfly net correct genesis * Butterfly artifacts * Give time before upgrade --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * chore:releasepolish v1.22 release (#10666) * Update butterfly artifacts * register actors v11 * Update calibration upgrade time * State inspection shed cmds * Fix * make gen * Fix swallowed errors * Lint fixup --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * v1.22.0-rc3 * bundle fix * Feat/expedite nv19 (#10681) * Update go-state-types * Modify upgrade schedule and params * Revert fip 0052 * Update gst * docsgen * fast butterfly migration to validate migration * Correct epoch to match specified date * Update actors v11 * Update changelog build version * Update butterfly artifacts * Fix lotus-miner init to work after upgrade --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * fix:deps:stable ffi for stable release (#10698) * Point to stable ffi for stable lotus release * go mod tidy --------- Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> * Update CHANGELOG.md Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com> --------- Co-authored-by: Aayush Rajasekaran <arajasek94@gmail.com> Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com> Co-authored-by: Jiaying Wang <42981373+jennijuju@users.noreply.github.com> Co-authored-by: Steven Allen <steven@stebalien.com> Co-authored-by: jennijuju <jiayingw703@gmail.com>
2023-04-19 22:40:18 +00:00
}, pp)
2022-01-14 13:11:04 +00:00
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "err", err)
2022-03-29 01:19:11 +00:00
addBad(sector.ID, fmt.Sprintf("generating vanilla proof: %s", err))
return
2020-12-01 23:32:01 +00:00
}
2022-03-29 01:19:11 +00:00
}(sector)
2020-05-16 21:03:29 +00:00
}
2022-03-29 19:28:37 +00:00
wg.Wait()
2020-05-16 21:03:29 +00:00
return bad, nil
}
var _ FaultTracker = &Manager{}