Merge pull request #9111 from filecoin-project/asr/fix-rec-setcor-limit
fix: post: restrict recoveries per deadline
This commit is contained in:
commit
c0d2f249bc
@ -186,3 +186,117 @@ func TestWindowPostNoPreChecks(t *testing.T) {
|
||||
sectors = p.MinerPower.RawBytePower.Uint64() / uint64(ssz)
|
||||
require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-2+1, int(sectors)) // -2 not recovered sectors + 1 just pledged
|
||||
}
|
||||
|
||||
func TestWindowPostMaxSectorsRecoveryConfig(t *testing.T) {
|
||||
oldVal := wdpost.RecoveringSectorLimit
|
||||
defer func() {
|
||||
wdpost.RecoveringSectorLimit = oldVal
|
||||
}()
|
||||
wdpost.RecoveringSectorLimit = 1
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
client, miner, ens := kit.EnsembleMinimal(t,
|
||||
kit.LatestActorsAt(-1),
|
||||
kit.MockProofs())
|
||||
ens.InterconnectAll().BeginMining(2 * time.Millisecond)
|
||||
|
||||
nSectors := 10
|
||||
|
||||
miner.PledgeSectors(ctx, nSectors, 0, nil)
|
||||
|
||||
maddr, err := miner.ActorAddress(ctx)
|
||||
require.NoError(t, err)
|
||||
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
mid, err := address.IDFromAddress(maddr)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Running one proving period")
|
||||
waitUntil := di.Open + di.WPoStProvingPeriod
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err := client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
ssz, err := miner.ActorSectorSize(ctx, maddr)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(nSectors+kit.DefaultPresealsPerBootstrapMiner)))
|
||||
|
||||
t.Log("Drop some sectors")
|
||||
|
||||
// Drop 2 sectors from deadline 2 partition 0 (full partition / deadline)
|
||||
parts, err := client.StateMinerPartitions(ctx, maddr, 2, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, len(parts), 0)
|
||||
|
||||
secs := parts[0].AllSectors
|
||||
n, err := secs.Count()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(2), n)
|
||||
|
||||
// Drop the partition
|
||||
err = secs.ForEach(func(sid uint64) error {
|
||||
return miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(storiface.SectorRef{
|
||||
ID: abi.SectorID{
|
||||
Miner: abi.ActorID(mid),
|
||||
Number: abi.SectorNumber(sid),
|
||||
},
|
||||
}, true)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Go through another PP, wait for sectors to become faulty")
|
||||
waitUntil = di.Open + di.WPoStProvingPeriod
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
|
||||
sectors := p.MinerPower.RawBytePower.Uint64() / uint64(ssz)
|
||||
require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-2, int(sectors)) // -2 just removed sectors
|
||||
|
||||
t.Log("Make the sectors recoverable")
|
||||
|
||||
err = secs.ForEach(func(sid uint64) error {
|
||||
return miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(storiface.SectorRef{
|
||||
ID: abi.SectorID{
|
||||
Miner: abi.ActorID(mid),
|
||||
Number: abi.SectorNumber(sid),
|
||||
},
|
||||
}, false)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
waitUntil = di.Open + di.WPoStProvingPeriod + 200
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
|
||||
sectors = p.MinerPower.RawBytePower.Uint64() / uint64(ssz)
|
||||
require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-1, int(sectors)) // -1 not recovered sector
|
||||
}
|
@ -2,6 +2,7 @@ package wdpost
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
@ -21,12 +22,12 @@ import (
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
)
|
||||
|
||||
var recoveringSectorLimit int64 = 0
|
||||
var RecoveringSectorLimit uint64 = 0
|
||||
|
||||
func init() {
|
||||
if rcl := os.Getenv("LOTUS_RECOVERING_SECTOR_LIMIT"); rcl != "" {
|
||||
var err error
|
||||
recoveringSectorLimit, err = strconv.ParseInt(rcl, 10, 64)
|
||||
RecoveringSectorLimit, err = strconv.ParseUint(rcl, 10, 64)
|
||||
if err != nil {
|
||||
log.Errorw("parsing LOTUS_RECOVERING_SECTOR_LIMIT", "error", err)
|
||||
}
|
||||
@ -92,6 +93,31 @@ func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint6
|
||||
continue
|
||||
}
|
||||
|
||||
// rules to follow if we have indicated that we don't want to recover more than X sectors in a deadline
|
||||
if RecoveringSectorLimit > 0 {
|
||||
// something weird happened, break because we can't recover any more
|
||||
if RecoveringSectorLimit < totalSectorsToRecover {
|
||||
log.Warnf("accepted more recoveries (%d) than RecoveringSectorLimit (%d)", totalSectorsToRecover, RecoveringSectorLimit)
|
||||
break
|
||||
}
|
||||
|
||||
maxNewRecoverable := RecoveringSectorLimit - totalSectorsToRecover
|
||||
|
||||
// we need to trim the recover bitfield
|
||||
if recoveredCount > maxNewRecoverable {
|
||||
recoverySlice, err := recovered.All(math.MaxUint64)
|
||||
if err != nil {
|
||||
log.Errorw("failed to slice recovery bitfield, breaking out of recovery loop", err)
|
||||
break
|
||||
}
|
||||
|
||||
log.Warnf("only adding %d sectors to respect RecoveringSectorLimit %d", maxNewRecoverable, RecoveringSectorLimit)
|
||||
|
||||
recovered = bitfield.NewFromSet(recoverySlice[:maxNewRecoverable])
|
||||
recoveredCount = maxNewRecoverable
|
||||
}
|
||||
}
|
||||
|
||||
// respect user config if set
|
||||
if s.maxPartitionsPerRecoveryMessage > 0 &&
|
||||
len(batchedRecoveryDecls[len(batchedRecoveryDecls)-1]) >= s.maxPartitionsPerRecoveryMessage {
|
||||
@ -106,9 +132,9 @@ func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint6
|
||||
|
||||
totalSectorsToRecover += recoveredCount
|
||||
|
||||
if recoveringSectorLimit > 0 && int64(totalSectorsToRecover) >= recoveringSectorLimit {
|
||||
if RecoveringSectorLimit > 0 && totalSectorsToRecover >= RecoveringSectorLimit {
|
||||
log.Errorf("reached recovering sector limit %d, only marking %d sectors for recovery now",
|
||||
recoveringSectorLimit,
|
||||
RecoveringSectorLimit,
|
||||
totalSectorsToRecover)
|
||||
break
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user