lotus/curiosrc/window/faults_simple.go
Andrew Jackson (Ajax) 81ba6ab6f0
feat: Curio - Easy Migration (#11617)
* feat: lp mig - first few steps

* lp mig: default tasks

* code comments

* docs

* lp-mig-progress

* shared

* comments and todos

* fix: curio: rename lotus-provider to curio (#11645)

* rename provider to curio

* install gotext

* fix lint errors, mod tidy

* fix typo

* fix API_INFO and add gotext to circleCI

* add back gotext

* add gotext after remerge

* lp: channels doc

* finish easy-migration TODOs

* out generate

* merging and more renames

* avoid make-all

* minor doc stuff

* cu: make gen

* make gen fix

* make gen

* tryfix

* go mod tidy

* minor ez migration fixes

* ez setup - ui cleanups

* better error message

* guided setup colors

* better path to saveconfigtolayer

* loadconfigwithupgrades fix

* readMiner oops

* guided - homedir

* err if miner is running

* prompt error should exit

* process already running, miner_id sectors in migration

* dont prompt for language a second time

* check miner stopped

* unlock repo

* render and sql oops

* curio easyMig - some fixes

* easyMigration runs successfully

* lint

* review fixes

* fix backup path

* fixes1

* fixes2

* fixes 3

---------

Co-authored-by: LexLuthr <88259624+LexLuthr@users.noreply.github.com>
Co-authored-by: LexLuthr <lexluthr@protocol.ai>
2024-03-15 16:38:13 -05:00

153 lines
3.9 KiB
Go

package window
import (
"context"
"crypto/rand"
"fmt"
"sync"
"time"
"golang.org/x/xerrors"
ffi "github.com/filecoin-project/filecoin-ffi"
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/lotus/storage/paths"
"github.com/filecoin-project/lotus/storage/sealer/storiface"
)
type SimpleFaultTracker struct {
storage paths.Store
index paths.SectorIndex
parallelCheckLimit int // todo live config?
singleCheckTimeout time.Duration
partitionCheckTimeout time.Duration
}
func NewSimpleFaultTracker(storage paths.Store, index paths.SectorIndex,
parallelCheckLimit int, singleCheckTimeout time.Duration, partitionCheckTimeout time.Duration) *SimpleFaultTracker {
return &SimpleFaultTracker{
storage: storage,
index: index,
parallelCheckLimit: parallelCheckLimit,
singleCheckTimeout: singleCheckTimeout,
partitionCheckTimeout: partitionCheckTimeout,
}
}
func (m *SimpleFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storiface.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
if rg == nil {
return nil, xerrors.Errorf("rg is nil")
}
var bad = make(map[abi.SectorID]string)
var badLk sync.Mutex
var postRand abi.PoStRandomness = make([]byte, abi.RandomnessLength)
_, _ = rand.Read(postRand)
postRand[31] &= 0x3f
limit := m.parallelCheckLimit
if limit <= 0 {
limit = len(sectors)
}
throttle := make(chan struct{}, limit)
addBad := func(s abi.SectorID, reason string) {
badLk.Lock()
bad[s] = reason
badLk.Unlock()
}
if m.partitionCheckTimeout > 0 {
var cancel2 context.CancelFunc
ctx, cancel2 = context.WithTimeout(ctx, m.partitionCheckTimeout)
defer cancel2()
}
var wg sync.WaitGroup
wg.Add(len(sectors))
for _, sector := range sectors {
select {
case throttle <- struct{}{}:
case <-ctx.Done():
addBad(sector.ID, fmt.Sprintf("waiting for check worker: %s", ctx.Err()))
wg.Done()
continue
}
go func(sector storiface.SectorRef) {
defer wg.Done()
defer func() {
<-throttle
}()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
commr, update, err := rg(ctx, sector.ID)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", "err", err)
addBad(sector.ID, fmt.Sprintf("getting commR: %s", err))
return
}
toLock := storiface.FTSealed | storiface.FTCache
if update {
toLock = storiface.FTUpdate | storiface.FTUpdateCache
}
locked, err := m.index.StorageTryLock(ctx, sector.ID, toLock, storiface.FTNone)
if err != nil {
addBad(sector.ID, fmt.Sprintf("tryLock error: %s", err))
return
}
if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
addBad(sector.ID, "can't acquire read lock")
return
}
ch, err := ffi.GeneratePoStFallbackSectorChallenges(pp, sector.ID.Miner, postRand, []abi.SectorNumber{
sector.ID.Number,
})
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "err", err)
addBad(sector.ID, fmt.Sprintf("generating fallback challenges: %s", err))
return
}
vctx := ctx
if m.singleCheckTimeout > 0 {
var cancel2 context.CancelFunc
vctx, cancel2 = context.WithTimeout(ctx, m.singleCheckTimeout)
defer cancel2()
}
_, err = m.storage.GenerateSingleVanillaProof(vctx, sector.ID.Miner, storiface.PostSectorChallenge{
SealProof: sector.ProofType,
SectorNumber: sector.ID.Number,
SealedCID: commr,
Challenge: ch.Challenges[sector.ID.Number],
Update: update,
}, pp)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "err", err)
addBad(sector.ID, fmt.Sprintf("generating vanilla proof: %s", err))
return
}
}(sector)
}
wg.Wait()
return bad, nil
}