Merge pull request #8959 from filecoin-project/feat/post-check-config
sealer: Config for disabling builtin PoSt / PoSt pre-checks
This commit is contained in:
commit
75d78de336
@ -958,6 +958,11 @@ workflows:
|
||||
suite: itest-verifreg
|
||||
target: "./itests/verifreg_test.go"
|
||||
|
||||
- test:
|
||||
name: test-itest-wdpost_check_config
|
||||
suite: itest-wdpost_check_config
|
||||
target: "./itests/wdpost_check_config_test.go"
|
||||
|
||||
- test:
|
||||
name: test-itest-wdpost_dispute
|
||||
suite: itest-wdpost_dispute
|
||||
@ -968,6 +973,11 @@ workflows:
|
||||
suite: itest-wdpost
|
||||
target: "./itests/wdpost_test.go"
|
||||
|
||||
- test:
|
||||
name: test-itest-wdpost_worker_config
|
||||
suite: itest-wdpost_worker_config
|
||||
target: "./itests/wdpost_worker_config_test.go"
|
||||
|
||||
- test:
|
||||
name: test-itest-worker
|
||||
suite: itest-worker
|
||||
|
@ -15,6 +15,9 @@
|
||||
|
||||
|
||||
[Backup]
|
||||
# When set to true disables metadata log (.lotus/kvlog). This can save disk
|
||||
# space by reducing metadata redundancy.
|
||||
#
|
||||
# Note that in case of metadata corruption it might be much harder to recover
|
||||
# your node if metadata log is disabled
|
||||
#
|
||||
|
@ -15,6 +15,9 @@
|
||||
|
||||
|
||||
[Backup]
|
||||
# When set to true disables metadata log (.lotus/kvlog). This can save disk
|
||||
# space by reducing metadata redundancy.
|
||||
#
|
||||
# Note that in case of metadata corruption it might be much harder to recover
|
||||
# your node if metadata log is disabled
|
||||
#
|
||||
@ -310,11 +313,68 @@
|
||||
|
||||
[Proving]
|
||||
# Maximum number of sector checks to run in parallel. (0 = unlimited)
|
||||
#
|
||||
# WARNING: Setting this value too high may make the node crash by running out of stack
|
||||
# WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due
|
||||
# to late submission.
|
||||
#
|
||||
# After changing this option, confirm that the new value works in your setup by invoking
|
||||
# 'lotus-miner proving compute window-post 0'
|
||||
#
|
||||
# type: int
|
||||
# env var: LOTUS_PROVING_PARALLELCHECKLIMIT
|
||||
#ParallelCheckLimit = 128
|
||||
|
||||
# Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present.
|
||||
#
|
||||
# WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need
|
||||
# to be recovered. Before enabling this option, make sure your PoSt workers work correctly.
|
||||
#
|
||||
# After changing this option, confirm that the new value works in your setup by invoking
|
||||
# 'lotus-miner proving compute window-post 0'
|
||||
#
|
||||
# type: bool
|
||||
# env var: LOTUS_PROVING_DISABLEBUILTINWINDOWPOST
|
||||
#DisableBuiltinWindowPoSt = false
|
||||
|
||||
# Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present.
|
||||
#
|
||||
# WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards.
|
||||
# Before enabling this option, make sure your PoSt workers work correctly.
|
||||
#
|
||||
# type: bool
|
||||
# env var: LOTUS_PROVING_DISABLEBUILTINWINNINGPOST
|
||||
#DisableBuiltinWinningPoSt = false
|
||||
|
||||
# Disable WindowPoSt provable sector readability checks.
|
||||
#
|
||||
# In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges
|
||||
# from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as
|
||||
# we're only interested in checking that sector data can be read.
|
||||
#
|
||||
# When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process
|
||||
# can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by
|
||||
# the builtin logic not skipping snark computation when some sectors need to be skipped.
|
||||
#
|
||||
# When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and
|
||||
# if challenges for some sectors aren't readable, those sectors will just get skipped.
|
||||
#
|
||||
# Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter
|
||||
# time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should
|
||||
# be negligible.
|
||||
#
|
||||
# NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers.
|
||||
#
|
||||
# NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is
|
||||
# sent to the chain
|
||||
#
|
||||
# After changing this option, confirm that the new value works in your setup by invoking
|
||||
# 'lotus-miner proving compute window-post 0'
|
||||
#
|
||||
# type: bool
|
||||
# env var: LOTUS_PROVING_DISABLEWDPOSTPRECHECKS
|
||||
#DisableWDPoStPreChecks = false
|
||||
|
||||
|
||||
[Sealing]
|
||||
# Upper bound on how many sectors can be waiting for more deals to be packed in it before it begins sealing at any given time.
|
||||
|
188
itests/wdpost_check_config_test.go
Normal file
188
itests/wdpost_check_config_test.go
Normal file
@ -0,0 +1,188 @@
|
||||
package itests
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/filecoin-project/go-address"
|
||||
"github.com/filecoin-project/go-state-types/abi"
|
||||
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/itests/kit"
|
||||
"github.com/filecoin-project/lotus/node"
|
||||
"github.com/filecoin-project/lotus/node/config"
|
||||
"github.com/filecoin-project/lotus/node/impl"
|
||||
"github.com/filecoin-project/lotus/node/modules"
|
||||
"github.com/filecoin-project/lotus/storage/sealer/mock"
|
||||
"github.com/filecoin-project/lotus/storage/sealer/storiface"
|
||||
"github.com/filecoin-project/lotus/storage/wdpost"
|
||||
)
|
||||
|
||||
func TestWindowPostNoPreChecks(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
client, miner, ens := kit.EnsembleMinimal(t,
|
||||
kit.LatestActorsAt(-1),
|
||||
kit.MockProofs(),
|
||||
kit.ConstructorOpts(
|
||||
node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(
|
||||
config.DefaultStorageMiner().Fees,
|
||||
config.ProvingConfig{
|
||||
DisableWDPoStPreChecks: true,
|
||||
},
|
||||
))))
|
||||
ens.InterconnectAll().BeginMining(2 * time.Millisecond)
|
||||
|
||||
nSectors := 10
|
||||
|
||||
miner.PledgeSectors(ctx, nSectors, 0, nil)
|
||||
|
||||
maddr, err := miner.ActorAddress(ctx)
|
||||
require.NoError(t, err)
|
||||
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
mid, err := address.IDFromAddress(maddr)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Running one proving period")
|
||||
waitUntil := di.Open + di.WPoStProvingPeriod
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err := client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
ssz, err := miner.ActorSectorSize(ctx, maddr)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(nSectors+kit.DefaultPresealsPerBootstrapMiner)))
|
||||
|
||||
t.Log("Drop some sectors")
|
||||
|
||||
// Drop 2 sectors from deadline 2 partition 0 (full partition / deadline)
|
||||
{
|
||||
parts, err := client.StateMinerPartitions(ctx, maddr, 2, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, len(parts), 0)
|
||||
|
||||
secs := parts[0].AllSectors
|
||||
n, err := secs.Count()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(2), n)
|
||||
|
||||
// Drop the partition
|
||||
err = secs.ForEach(func(sid uint64) error {
|
||||
return miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkCorrupted(storiface.SectorRef{
|
||||
ID: abi.SectorID{
|
||||
Miner: abi.ActorID(mid),
|
||||
Number: abi.SectorNumber(sid),
|
||||
},
|
||||
}, true)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
var s storiface.SectorRef
|
||||
|
||||
// Drop 1 sectors from deadline 3 partition 0
|
||||
{
|
||||
parts, err := client.StateMinerPartitions(ctx, maddr, 3, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, len(parts), 0)
|
||||
|
||||
secs := parts[0].AllSectors
|
||||
n, err := secs.Count()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(2), n)
|
||||
|
||||
// Drop the sector
|
||||
sn, err := secs.First()
|
||||
require.NoError(t, err)
|
||||
|
||||
all, err := secs.All(2)
|
||||
require.NoError(t, err)
|
||||
t.Log("the sectors", all)
|
||||
|
||||
s = storiface.SectorRef{
|
||||
ID: abi.SectorID{
|
||||
Miner: abi.ActorID(mid),
|
||||
Number: abi.SectorNumber(sn),
|
||||
},
|
||||
}
|
||||
|
||||
err = miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(s, true)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Log("Go through another PP, wait for sectors to become faulty")
|
||||
waitUntil = di.Open + di.WPoStProvingPeriod
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
|
||||
sectors := p.MinerPower.RawBytePower.Uint64() / uint64(ssz)
|
||||
require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-3, int(sectors)) // -3 just removed sectors
|
||||
|
||||
t.Log("Recover one sector")
|
||||
|
||||
err = miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(s, false)
|
||||
require.NoError(t, err)
|
||||
|
||||
di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
waitUntil = di.Open + di.WPoStProvingPeriod
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
|
||||
sectors = p.MinerPower.RawBytePower.Uint64() / uint64(ssz)
|
||||
require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-2, int(sectors)) // -2 not recovered sectors
|
||||
|
||||
// pledge a sector after recovery
|
||||
|
||||
miner.PledgeSectors(ctx, 1, nSectors, nil)
|
||||
|
||||
{
|
||||
// Wait until proven.
|
||||
di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
waitUntil := di.Open + di.WPoStProvingPeriod
|
||||
t.Logf("End for head.Height > %d\n", waitUntil)
|
||||
|
||||
ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
}
|
||||
|
||||
p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
|
||||
sectors = p.MinerPower.RawBytePower.Uint64() / uint64(ssz)
|
||||
require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-2+1, int(sectors)) // -2 not recovered sectors + 1 just pledged
|
||||
}
|
201
itests/wdpost_worker_config_test.go
Normal file
201
itests/wdpost_worker_config_test.go
Normal file
@ -0,0 +1,201 @@
|
||||
package itests
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/filecoin-project/go-address"
|
||||
"github.com/filecoin-project/go-state-types/abi"
|
||||
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/itests/kit"
|
||||
"github.com/filecoin-project/lotus/node"
|
||||
"github.com/filecoin-project/lotus/node/config"
|
||||
"github.com/filecoin-project/lotus/node/impl"
|
||||
"github.com/filecoin-project/lotus/node/modules"
|
||||
"github.com/filecoin-project/lotus/storage/sealer"
|
||||
"github.com/filecoin-project/lotus/storage/sealer/sealtasks"
|
||||
"github.com/filecoin-project/lotus/storage/sealer/storiface"
|
||||
"github.com/filecoin-project/lotus/storage/wdpost"
|
||||
)
|
||||
|
||||
func TestWindowPostNoBuiltinWindow(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
_ = logging.SetLogLevel("storageminer", "INFO")
|
||||
|
||||
sectors := 2 * 48 * 2
|
||||
|
||||
client, miner, _, ens := kit.EnsembleWorker(t,
|
||||
kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines
|
||||
kit.LatestActorsAt(-1),
|
||||
kit.ConstructorOpts(
|
||||
node.Override(new(sealer.Config), func() sealer.Config {
|
||||
c := config.DefaultStorageMiner().StorageManager()
|
||||
c.DisableBuiltinWindowPoSt = true
|
||||
return c
|
||||
}),
|
||||
node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(
|
||||
config.DefaultStorageMiner().Fees,
|
||||
config.ProvingConfig{
|
||||
DisableWDPoStPreChecks: false,
|
||||
},
|
||||
))),
|
||||
kit.ThroughRPC()) // generic non-post worker
|
||||
|
||||
maddr, err := miner.ActorAddress(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0]
|
||||
|
||||
di = di.NextNotElapsed()
|
||||
|
||||
t.Log("Running one proving period")
|
||||
waitUntil := di.Open + di.WPoStChallengeWindow*2 + wdpost.SubmitConfidence
|
||||
client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
|
||||
t.Log("Waiting for post message")
|
||||
bm.Stop()
|
||||
|
||||
var lastPending []*types.SignedMessage
|
||||
for i := 0; i < 500; i++ {
|
||||
lastPending, err = client.MpoolPending(ctx, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
if len(lastPending) > 0 {
|
||||
break
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
// expect no post messages
|
||||
require.Equal(t, len(lastPending), 0)
|
||||
}
|
||||
|
||||
func TestWindowPostNoBuiltinWindowWithWorker(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
_ = logging.SetLogLevel("storageminer", "INFO")
|
||||
|
||||
sectors := 2 * 48 * 2
|
||||
|
||||
client, miner, _, ens := kit.EnsembleWorker(t,
|
||||
kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines
|
||||
kit.LatestActorsAt(-1),
|
||||
kit.ConstructorOpts(
|
||||
node.Override(new(sealer.Config), func() sealer.Config {
|
||||
c := config.DefaultStorageMiner().StorageManager()
|
||||
c.DisableBuiltinWindowPoSt = true
|
||||
return c
|
||||
}),
|
||||
node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(
|
||||
config.DefaultStorageMiner().Fees,
|
||||
config.ProvingConfig{
|
||||
DisableBuiltinWindowPoSt: true,
|
||||
DisableBuiltinWinningPoSt: false,
|
||||
DisableWDPoStPreChecks: false,
|
||||
},
|
||||
))),
|
||||
kit.ThroughRPC(),
|
||||
kit.WithTaskTypes([]sealtasks.TaskType{sealtasks.TTGenerateWindowPoSt}))
|
||||
|
||||
maddr, err := miner.ActorAddress(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0]
|
||||
|
||||
di = di.NextNotElapsed()
|
||||
|
||||
t.Log("Running one proving period")
|
||||
waitUntil := di.Open + di.WPoStChallengeWindow*2 + wdpost.SubmitConfidence
|
||||
client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
|
||||
t.Log("Waiting for post message")
|
||||
bm.Stop()
|
||||
|
||||
var lastPending []*types.SignedMessage
|
||||
for i := 0; i < 500; i++ {
|
||||
lastPending, err = client.MpoolPending(ctx, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
if len(lastPending) > 0 {
|
||||
break
|
||||
}
|
||||
time.Sleep(40 * time.Millisecond)
|
||||
}
|
||||
|
||||
require.Greater(t, len(lastPending), 0)
|
||||
|
||||
t.Log("post message landed")
|
||||
|
||||
bm.MineBlocks(ctx, 2*time.Millisecond)
|
||||
|
||||
waitUntil = di.Open + di.WPoStChallengeWindow*3
|
||||
t.Logf("End for head.Height > %d", waitUntil)
|
||||
|
||||
ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err := client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
ssz, err := miner.ActorSectorSize(ctx, maddr)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(sectors)))
|
||||
|
||||
mid, err := address.IDFromAddress(maddr)
|
||||
require.NoError(t, err)
|
||||
|
||||
di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Remove one sector in the next deadline (so it's skipped)
|
||||
{
|
||||
parts, err := client.StateMinerPartitions(ctx, maddr, di.Index+1, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, len(parts), 0)
|
||||
|
||||
secs := parts[0].AllSectors
|
||||
n, err := secs.Count()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(2), n)
|
||||
|
||||
// Drop the sector
|
||||
sid, err := secs.First()
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Logf("Drop sector %d; dl %d part %d", sid, di.Index+1, 0)
|
||||
|
||||
err = miner.BaseAPI.(*impl.StorageMinerAPI).IStorageMgr.Remove(ctx, storiface.SectorRef{
|
||||
ID: abi.SectorID{
|
||||
Miner: abi.ActorID(mid),
|
||||
Number: abi.SectorNumber(sid),
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
waitUntil = di.Close + di.WPoStChallengeWindow
|
||||
ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil))
|
||||
t.Logf("Now head.Height = %d", ts.Height())
|
||||
|
||||
p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||
require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(sectors-1)))
|
||||
}
|
@ -117,7 +117,7 @@ func ConfigStorageMiner(c interface{}) Option {
|
||||
Override(new(*miner.Miner), modules.SetupBlockProducer),
|
||||
Override(new(gen.WinningPoStProver), storage.NewWinningPoStProver),
|
||||
Override(new(*storage.Miner), modules.StorageMiner(cfg.Fees)),
|
||||
Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(cfg.Fees)),
|
||||
Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(cfg.Fees, cfg.Proving)),
|
||||
Override(new(sectorblocks.SectorBuilder), From(new(*storage.Miner))),
|
||||
),
|
||||
|
||||
|
@ -53,7 +53,7 @@ func run() error {
|
||||
continue
|
||||
}
|
||||
case stType:
|
||||
if strings.HasPrefix(line, "// ") {
|
||||
if strings.HasPrefix(line, "//") {
|
||||
cline := strings.TrimSpace(strings.TrimPrefix(line, "//"))
|
||||
currentComment = append(currentComment, cline)
|
||||
continue
|
||||
|
@ -34,7 +34,10 @@ var Doc = map[string][]DocField{
|
||||
Name: "DisableMetadataLog",
|
||||
Type: "bool",
|
||||
|
||||
Comment: `Note that in case of metadata corruption it might be much harder to recover
|
||||
Comment: `When set to true disables metadata log (.lotus/kvlog). This can save disk
|
||||
space by reducing metadata redundancy.
|
||||
|
||||
Note that in case of metadata corruption it might be much harder to recover
|
||||
your node if metadata log is disabled`,
|
||||
},
|
||||
},
|
||||
@ -627,7 +630,64 @@ over the worker address if this flag is set.`,
|
||||
Name: "ParallelCheckLimit",
|
||||
Type: "int",
|
||||
|
||||
Comment: `Maximum number of sector checks to run in parallel. (0 = unlimited)`,
|
||||
Comment: `Maximum number of sector checks to run in parallel. (0 = unlimited)
|
||||
|
||||
WARNING: Setting this value too high may make the node crash by running out of stack
|
||||
WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due
|
||||
to late submission.
|
||||
|
||||
After changing this option, confirm that the new value works in your setup by invoking
|
||||
'lotus-miner proving compute window-post 0'`,
|
||||
},
|
||||
{
|
||||
Name: "DisableBuiltinWindowPoSt",
|
||||
Type: "bool",
|
||||
|
||||
Comment: `Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present.
|
||||
|
||||
WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need
|
||||
to be recovered. Before enabling this option, make sure your PoSt workers work correctly.
|
||||
|
||||
After changing this option, confirm that the new value works in your setup by invoking
|
||||
'lotus-miner proving compute window-post 0'`,
|
||||
},
|
||||
{
|
||||
Name: "DisableBuiltinWinningPoSt",
|
||||
Type: "bool",
|
||||
|
||||
Comment: `Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present.
|
||||
|
||||
WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards.
|
||||
Before enabling this option, make sure your PoSt workers work correctly.`,
|
||||
},
|
||||
{
|
||||
Name: "DisableWDPoStPreChecks",
|
||||
Type: "bool",
|
||||
|
||||
Comment: `Disable WindowPoSt provable sector readability checks.
|
||||
|
||||
In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges
|
||||
from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as
|
||||
we're only interested in checking that sector data can be read.
|
||||
|
||||
When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process
|
||||
can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by
|
||||
the builtin logic not skipping snark computation when some sectors need to be skipped.
|
||||
|
||||
When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and
|
||||
if challenges for some sectors aren't readable, those sectors will just get skipped.
|
||||
|
||||
Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter
|
||||
time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should
|
||||
be negligible.
|
||||
|
||||
NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers.
|
||||
|
||||
NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is
|
||||
sent to the chain
|
||||
|
||||
After changing this option, confirm that the new value works in your setup by invoking
|
||||
'lotus-miner proving compute window-post 0'`,
|
||||
},
|
||||
},
|
||||
"Pubsub": []DocField{
|
||||
|
@ -67,6 +67,8 @@ func (c *StorageMiner) StorageManager() sealer.Config {
|
||||
|
||||
Assigner: c.Storage.Assigner,
|
||||
|
||||
ParallelCheckLimit: c.Proving.ParallelCheckLimit,
|
||||
ParallelCheckLimit: c.Proving.ParallelCheckLimit,
|
||||
DisableBuiltinWindowPoSt: c.Proving.DisableBuiltinWindowPoSt,
|
||||
DisableBuiltinWinningPoSt: c.Proving.DisableBuiltinWinningPoSt,
|
||||
}
|
||||
}
|
||||
|
@ -221,9 +221,55 @@ type RetrievalPricingDefault struct {
|
||||
|
||||
type ProvingConfig struct {
|
||||
// Maximum number of sector checks to run in parallel. (0 = unlimited)
|
||||
//
|
||||
// WARNING: Setting this value too high may make the node crash by running out of stack
|
||||
// WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due
|
||||
// to late submission.
|
||||
//
|
||||
// After changing this option, confirm that the new value works in your setup by invoking
|
||||
// 'lotus-miner proving compute window-post 0'
|
||||
ParallelCheckLimit int
|
||||
|
||||
// todo disable builtin post
|
||||
// Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present.
|
||||
//
|
||||
// WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need
|
||||
// to be recovered. Before enabling this option, make sure your PoSt workers work correctly.
|
||||
//
|
||||
// After changing this option, confirm that the new value works in your setup by invoking
|
||||
// 'lotus-miner proving compute window-post 0'
|
||||
DisableBuiltinWindowPoSt bool
|
||||
|
||||
// Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present.
|
||||
//
|
||||
// WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards.
|
||||
// Before enabling this option, make sure your PoSt workers work correctly.
|
||||
DisableBuiltinWinningPoSt bool
|
||||
|
||||
// Disable WindowPoSt provable sector readability checks.
|
||||
//
|
||||
// In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges
|
||||
// from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as
|
||||
// we're only interested in checking that sector data can be read.
|
||||
//
|
||||
// When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process
|
||||
// can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by
|
||||
// the builtin logic not skipping snark computation when some sectors need to be skipped.
|
||||
//
|
||||
// When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and
|
||||
// if challenges for some sectors aren't readable, those sectors will just get skipped.
|
||||
//
|
||||
// Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter
|
||||
// time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should
|
||||
// be negligible.
|
||||
//
|
||||
// NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers.
|
||||
//
|
||||
// NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is
|
||||
// sent to the chain
|
||||
//
|
||||
// After changing this option, confirm that the new value works in your setup by invoking
|
||||
// 'lotus-miner proving compute window-post 0'
|
||||
DisableWDPoStPreChecks bool
|
||||
}
|
||||
|
||||
type SealingConfig struct {
|
||||
|
@ -254,7 +254,7 @@ func StorageMiner(fc config.MinerFeeConfig) func(params StorageMinerParams) (*st
|
||||
}
|
||||
}
|
||||
|
||||
func WindowPostScheduler(fc config.MinerFeeConfig) func(params StorageMinerParams) (*wdpost.WindowPoStScheduler, error) {
|
||||
func WindowPostScheduler(fc config.MinerFeeConfig, pc config.ProvingConfig) func(params StorageMinerParams) (*wdpost.WindowPoStScheduler, error) {
|
||||
return func(params StorageMinerParams) (*wdpost.WindowPoStScheduler, error) {
|
||||
var (
|
||||
mctx = params.MetricsCtx
|
||||
@ -269,7 +269,7 @@ func WindowPostScheduler(fc config.MinerFeeConfig) func(params StorageMinerParam
|
||||
|
||||
ctx := helpers.LifecycleCtx(mctx, lc)
|
||||
|
||||
fps, err := wdpost.NewWindowedPoStScheduler(api, fc, as, sealer, verif, sealer, j, maddr)
|
||||
fps, err := wdpost.NewWindowedPoStScheduler(api, fc, pc, as, sealer, verif, sealer, j, maddr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -69,8 +69,10 @@ type Manager struct {
|
||||
workLk sync.Mutex
|
||||
work *statestore.StateStore
|
||||
|
||||
parallelCheckLimit int
|
||||
disallowRemoteFinalize bool
|
||||
parallelCheckLimit int
|
||||
disableBuiltinWindowPoSt bool
|
||||
disableBuiltinWinningPoSt bool
|
||||
disallowRemoteFinalize bool
|
||||
|
||||
callToWork map[storiface.CallID]WorkID
|
||||
// used when we get an early return and there's no callToWork mapping
|
||||
@ -120,7 +122,9 @@ type Config struct {
|
||||
ResourceFiltering ResourceFilteringStrategy
|
||||
|
||||
// PoSt config
|
||||
ParallelCheckLimit int
|
||||
ParallelCheckLimit int
|
||||
DisableBuiltinWindowPoSt bool
|
||||
DisableBuiltinWinningPoSt bool
|
||||
|
||||
DisallowRemoteFinalize bool
|
||||
|
||||
@ -156,8 +160,10 @@ func New(ctx context.Context, lstor *paths.Local, stor paths.Store, ls paths.Loc
|
||||
|
||||
localProver: prover,
|
||||
|
||||
parallelCheckLimit: sc.ParallelCheckLimit,
|
||||
disallowRemoteFinalize: sc.DisallowRemoteFinalize,
|
||||
parallelCheckLimit: sc.ParallelCheckLimit,
|
||||
disableBuiltinWindowPoSt: sc.DisableBuiltinWindowPoSt,
|
||||
disableBuiltinWinningPoSt: sc.DisableBuiltinWinningPoSt,
|
||||
disallowRemoteFinalize: sc.DisallowRemoteFinalize,
|
||||
|
||||
work: mss,
|
||||
callToWork: map[storiface.CallID]WorkID{},
|
||||
|
@ -17,7 +17,9 @@ import (
|
||||
)
|
||||
|
||||
func (m *Manager) GenerateWinningPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.ExtendedSectorInfo, randomness abi.PoStRandomness) ([]proof.PoStProof, error) {
|
||||
if !m.winningPoStSched.CanSched(ctx) {
|
||||
if !m.disableBuiltinWinningPoSt && !m.winningPoStSched.CanSched(ctx) {
|
||||
// if builtin PoSt isn't disabled, and there are no workers, compute the PoSt locally
|
||||
|
||||
log.Info("GenerateWinningPoSt run at lotus-miner")
|
||||
return m.localProver.GenerateWinningPoSt(ctx, minerID, sectorInfo, randomness)
|
||||
}
|
||||
@ -76,7 +78,9 @@ func (m *Manager) generateWinningPoSt(ctx context.Context, minerID abi.ActorID,
|
||||
}
|
||||
|
||||
func (m *Manager) GenerateWindowPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.ExtendedSectorInfo, randomness abi.PoStRandomness) (proof []proof.PoStProof, skipped []abi.SectorID, err error) {
|
||||
if !m.windowPoStSched.CanSched(ctx) {
|
||||
if !m.disableBuiltinWindowPoSt && !m.windowPoStSched.CanSched(ctx) {
|
||||
// if builtin PoSt isn't disabled, and there are no workers, compute the PoSt locally
|
||||
|
||||
log.Info("GenerateWindowPoSt run at lotus-miner")
|
||||
return m.localProver.GenerateWindowPoSt(ctx, minerID, sectorInfo, randomness)
|
||||
}
|
||||
@ -230,11 +234,9 @@ func (m *Manager) generatePartitionWindowPost(ctx context.Context, spt abi.Regis
|
||||
}
|
||||
|
||||
func (m *Manager) GenerateWinningPoStWithVanilla(ctx context.Context, proofType abi.RegisteredPoStProof, minerID abi.ActorID, randomness abi.PoStRandomness, proofs [][]byte) ([]proof.PoStProof, error) {
|
||||
//TODO implement me
|
||||
panic("implement me")
|
||||
panic("worker-level api shouldn't be called at this level")
|
||||
}
|
||||
|
||||
func (m *Manager) GenerateWindowPoStWithVanilla(ctx context.Context, proofType abi.RegisteredPoStProof, minerID abi.ActorID, randomness abi.PoStRandomness, proofs [][]byte, partitionIdx int) (proof.PoStProof, error) {
|
||||
//TODO implement me
|
||||
panic("implement me")
|
||||
panic("worker-level api shouldn't be called at this level")
|
||||
}
|
||||
|
@ -241,275 +241,6 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B
|
||||
return sbf, nil
|
||||
}
|
||||
|
||||
// declareRecoveries identifies sectors that were previously marked as faulty
|
||||
// for our miner, but are now recovered (i.e. are now provable again) and
|
||||
// still not reported as such.
|
||||
//
|
||||
// It then reports the recovery on chain via a `DeclareFaultsRecovered`
|
||||
// message to our miner actor.
|
||||
//
|
||||
// This is always invoked ahead of time, before the deadline for the evaluated
|
||||
// sectors arrives. That way, recoveries are declared in preparation for those
|
||||
// sectors to be proven.
|
||||
//
|
||||
// If a declaration is made, it awaits for build.MessageConfidence confirmations
|
||||
// on chain before returning.
|
||||
//
|
||||
// TODO: the waiting should happen in the background. Right now this
|
||||
// is blocking/delaying the actual generation and submission of WindowPoSts in
|
||||
// this deadline!
|
||||
func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.RecoveryDeclaration, *types.SignedMessage, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "storage.declareRecoveries")
|
||||
defer span.End()
|
||||
|
||||
faulty := uint64(0)
|
||||
params := &miner.DeclareFaultsRecoveredParams{
|
||||
Recoveries: []miner.RecoveryDeclaration{},
|
||||
}
|
||||
|
||||
for partIdx, partition := range partitions {
|
||||
unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("subtracting recovered set from fault set: %w", err)
|
||||
}
|
||||
|
||||
uc, err := unrecovered.Count()
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("counting unrecovered sectors: %w", err)
|
||||
}
|
||||
|
||||
if uc == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
faulty += uc
|
||||
|
||||
recovered, err := s.checkSectors(ctx, unrecovered, tsk)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("checking unrecovered sectors: %w", err)
|
||||
}
|
||||
|
||||
// if all sectors failed to recover, don't declare recoveries
|
||||
recoveredCount, err := recovered.Count()
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("counting recovered sectors: %w", err)
|
||||
}
|
||||
|
||||
if recoveredCount == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
params.Recoveries = append(params.Recoveries, miner.RecoveryDeclaration{
|
||||
Deadline: dlIdx,
|
||||
Partition: uint64(partIdx),
|
||||
Sectors: recovered,
|
||||
})
|
||||
}
|
||||
|
||||
recoveries := params.Recoveries
|
||||
if len(recoveries) == 0 {
|
||||
if faulty != 0 {
|
||||
log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty)
|
||||
}
|
||||
|
||||
return recoveries, nil, nil
|
||||
}
|
||||
|
||||
enc, aerr := actors.SerializeParams(params)
|
||||
if aerr != nil {
|
||||
return recoveries, nil, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr)
|
||||
}
|
||||
|
||||
msg := &types.Message{
|
||||
To: s.actor,
|
||||
Method: builtin.MethodsMiner.DeclareFaultsRecovered,
|
||||
Params: enc,
|
||||
Value: types.NewInt(0),
|
||||
}
|
||||
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
|
||||
if err := s.prepareMessage(ctx, msg, spec); err != nil {
|
||||
return recoveries, nil, err
|
||||
}
|
||||
|
||||
sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)})
|
||||
if err != nil {
|
||||
return recoveries, sm, xerrors.Errorf("pushing message to mpool: %w", err)
|
||||
}
|
||||
|
||||
log.Warnw("declare faults recovered Message CID", "cid", sm.Cid())
|
||||
|
||||
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
|
||||
if err != nil {
|
||||
return recoveries, sm, xerrors.Errorf("declare faults recovered wait error: %w", err)
|
||||
}
|
||||
|
||||
if rec.Receipt.ExitCode != 0 {
|
||||
return recoveries, sm, xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||
}
|
||||
|
||||
return recoveries, sm, nil
|
||||
}
|
||||
|
||||
// declareFaults identifies the sectors on the specified proving deadline that
|
||||
// are faulty, and reports the faults on chain via the `DeclareFaults` message
|
||||
// to our miner actor.
|
||||
//
|
||||
// This is always invoked ahead of time, before the deadline for the evaluated
|
||||
// sectors arrives. That way, faults are declared before a penalty is accrued.
|
||||
//
|
||||
// If a declaration is made, it awaits for build.MessageConfidence confirmations
|
||||
// on chain before returning.
|
||||
//
|
||||
// TODO: the waiting should happen in the background. Right now this
|
||||
// is blocking/delaying the actual generation and submission of WindowPoSts in
|
||||
// this deadline!
|
||||
func (s *WindowPoStScheduler) declareFaults(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.FaultDeclaration, *types.SignedMessage, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "storage.declareFaults")
|
||||
defer span.End()
|
||||
|
||||
bad := uint64(0)
|
||||
params := &miner.DeclareFaultsParams{
|
||||
Faults: []miner.FaultDeclaration{},
|
||||
}
|
||||
|
||||
for partIdx, partition := range partitions {
|
||||
nonFaulty, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("determining non faulty sectors: %w", err)
|
||||
}
|
||||
|
||||
good, err := s.checkSectors(ctx, nonFaulty, tsk)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("checking sectors: %w", err)
|
||||
}
|
||||
|
||||
newFaulty, err := bitfield.SubtractBitField(nonFaulty, good)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("calculating faulty sector set: %w", err)
|
||||
}
|
||||
|
||||
c, err := newFaulty.Count()
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("counting faulty sectors: %w", err)
|
||||
}
|
||||
|
||||
if c == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
bad += c
|
||||
|
||||
params.Faults = append(params.Faults, miner.FaultDeclaration{
|
||||
Deadline: dlIdx,
|
||||
Partition: uint64(partIdx),
|
||||
Sectors: newFaulty,
|
||||
})
|
||||
}
|
||||
|
||||
faults := params.Faults
|
||||
if len(faults) == 0 {
|
||||
return faults, nil, nil
|
||||
}
|
||||
|
||||
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad)
|
||||
|
||||
enc, aerr := actors.SerializeParams(params)
|
||||
if aerr != nil {
|
||||
return faults, nil, xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
|
||||
}
|
||||
|
||||
msg := &types.Message{
|
||||
To: s.actor,
|
||||
Method: builtin.MethodsMiner.DeclareFaults,
|
||||
Params: enc,
|
||||
Value: types.NewInt(0), // TODO: Is there a fee?
|
||||
}
|
||||
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
|
||||
if err := s.prepareMessage(ctx, msg, spec); err != nil {
|
||||
return faults, nil, err
|
||||
}
|
||||
|
||||
sm, err := s.api.MpoolPushMessage(ctx, msg, spec)
|
||||
if err != nil {
|
||||
return faults, sm, xerrors.Errorf("pushing message to mpool: %w", err)
|
||||
}
|
||||
|
||||
log.Warnw("declare faults Message CID", "cid", sm.Cid())
|
||||
|
||||
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
|
||||
if err != nil {
|
||||
return faults, sm, xerrors.Errorf("declare faults wait error: %w", err)
|
||||
}
|
||||
|
||||
if rec.Receipt.ExitCode != 0 {
|
||||
return faults, sm, xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||
}
|
||||
|
||||
return faults, sm, nil
|
||||
}
|
||||
|
||||
func (s *WindowPoStScheduler) asyncFaultRecover(di dline.Info, ts *types.TipSet) {
|
||||
go func() {
|
||||
// check faults / recoveries for the *next* deadline. It's already too
|
||||
// late to declare them for this deadline
|
||||
declDeadline := (di.Index + 2) % di.WPoStPeriodDeadlines
|
||||
|
||||
partitions, err := s.api.StateMinerPartitions(context.TODO(), s.actor, declDeadline, ts.Key())
|
||||
if err != nil {
|
||||
log.Errorf("getting partitions: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
sigmsg *types.SignedMessage
|
||||
recoveries []miner.RecoveryDeclaration
|
||||
faults []miner.FaultDeclaration
|
||||
|
||||
// optionalCid returns the CID of the message, or cid.Undef is the
|
||||
// message is nil. We don't need the argument (could capture the
|
||||
// pointer), but it's clearer and purer like that.
|
||||
optionalCid = func(sigmsg *types.SignedMessage) cid.Cid {
|
||||
if sigmsg == nil {
|
||||
return cid.Undef
|
||||
}
|
||||
return sigmsg.Cid()
|
||||
}
|
||||
)
|
||||
|
||||
if recoveries, sigmsg, err = s.declareRecoveries(context.TODO(), declDeadline, partitions, ts.Key()); err != nil {
|
||||
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
|
||||
log.Errorf("checking sector recoveries: %v", err)
|
||||
}
|
||||
|
||||
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStRecoveries], func() interface{} {
|
||||
j := WdPoStRecoveriesProcessedEvt{
|
||||
evtCommon: s.getEvtCommon(err),
|
||||
Declarations: recoveries,
|
||||
MessageCID: optionalCid(sigmsg),
|
||||
}
|
||||
j.Error = err
|
||||
return j
|
||||
})
|
||||
|
||||
if ts.Height() > build.UpgradeIgnitionHeight {
|
||||
return // FORK: declaring faults after ignition upgrade makes no sense
|
||||
}
|
||||
|
||||
if faults, sigmsg, err = s.declareFaults(context.TODO(), declDeadline, partitions, ts.Key()); err != nil {
|
||||
// TODO: This is also potentially really bad, but we try to post anyways
|
||||
log.Errorf("checking sector faults: %v", err)
|
||||
}
|
||||
|
||||
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStFaults], func() interface{} {
|
||||
return WdPoStFaultsProcessedEvt{
|
||||
evtCommon: s.getEvtCommon(err),
|
||||
Declarations: faults,
|
||||
MessageCID: optionalCid(sigmsg),
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
// runPoStCycle runs a full cycle of the PoSt process:
|
||||
//
|
||||
// 1. performs recovery declarations for the next deadline.
|
||||
@ -604,9 +335,15 @@ func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, manual bool, di
|
||||
return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err)
|
||||
}
|
||||
|
||||
good, err := s.checkSectors(ctx, toProve, ts.Key())
|
||||
good, err := toProve.Copy()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
|
||||
return nil, xerrors.Errorf("copy toProve: %w", err)
|
||||
}
|
||||
if !s.disablePreChecks {
|
||||
good, err = s.checkSectors(ctx, toProve, ts.Key())
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
good, err = bitfield.SubtractBitField(good, postSkipped)
|
||||
|
291
storage/wdpost/wdpost_run_faults.go
Normal file
291
storage/wdpost/wdpost_run_faults.go
Normal file
@ -0,0 +1,291 @@
|
||||
package wdpost
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/ipfs/go-cid"
|
||||
"go.opencensus.io/trace"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/filecoin-project/go-bitfield"
|
||||
"github.com/filecoin-project/go-state-types/abi"
|
||||
"github.com/filecoin-project/go-state-types/builtin"
|
||||
"github.com/filecoin-project/go-state-types/builtin/v8/miner"
|
||||
"github.com/filecoin-project/go-state-types/dline"
|
||||
|
||||
"github.com/filecoin-project/lotus/api"
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/chain/actors"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
)
|
||||
|
||||
// declareRecoveries identifies sectors that were previously marked as faulty
|
||||
// for our miner, but are now recovered (i.e. are now provable again) and
|
||||
// still not reported as such.
|
||||
//
|
||||
// It then reports the recovery on chain via a `DeclareFaultsRecovered`
|
||||
// message to our miner actor.
|
||||
//
|
||||
// This is always invoked ahead of time, before the deadline for the evaluated
|
||||
// sectors arrives. That way, recoveries are declared in preparation for those
|
||||
// sectors to be proven.
|
||||
//
|
||||
// If a declaration is made, it awaits for build.MessageConfidence confirmations
|
||||
// on chain before returning.
|
||||
//
|
||||
// TODO: the waiting should happen in the background. Right now this
|
||||
// is blocking/delaying the actual generation and submission of WindowPoSts in
|
||||
// this deadline!
|
||||
func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.RecoveryDeclaration, *types.SignedMessage, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "storage.declareRecoveries")
|
||||
defer span.End()
|
||||
|
||||
faulty := uint64(0)
|
||||
params := &miner.DeclareFaultsRecoveredParams{
|
||||
Recoveries: []miner.RecoveryDeclaration{},
|
||||
}
|
||||
|
||||
for partIdx, partition := range partitions {
|
||||
unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("subtracting recovered set from fault set: %w", err)
|
||||
}
|
||||
|
||||
uc, err := unrecovered.Count()
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("counting unrecovered sectors: %w", err)
|
||||
}
|
||||
|
||||
if uc == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
faulty += uc
|
||||
|
||||
recovered, err := s.checkSectors(ctx, unrecovered, tsk)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("checking unrecovered sectors: %w", err)
|
||||
}
|
||||
|
||||
// if all sectors failed to recover, don't declare recoveries
|
||||
recoveredCount, err := recovered.Count()
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("counting recovered sectors: %w", err)
|
||||
}
|
||||
|
||||
if recoveredCount == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
params.Recoveries = append(params.Recoveries, miner.RecoveryDeclaration{
|
||||
Deadline: dlIdx,
|
||||
Partition: uint64(partIdx),
|
||||
Sectors: recovered,
|
||||
})
|
||||
}
|
||||
|
||||
recoveries := params.Recoveries
|
||||
if len(recoveries) == 0 {
|
||||
if faulty != 0 {
|
||||
log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty)
|
||||
}
|
||||
|
||||
return recoveries, nil, nil
|
||||
}
|
||||
|
||||
enc, aerr := actors.SerializeParams(params)
|
||||
if aerr != nil {
|
||||
return recoveries, nil, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr)
|
||||
}
|
||||
|
||||
msg := &types.Message{
|
||||
To: s.actor,
|
||||
Method: builtin.MethodsMiner.DeclareFaultsRecovered,
|
||||
Params: enc,
|
||||
Value: types.NewInt(0),
|
||||
}
|
||||
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
|
||||
if err := s.prepareMessage(ctx, msg, spec); err != nil {
|
||||
return recoveries, nil, err
|
||||
}
|
||||
|
||||
sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)})
|
||||
if err != nil {
|
||||
return recoveries, sm, xerrors.Errorf("pushing message to mpool: %w", err)
|
||||
}
|
||||
|
||||
log.Warnw("declare faults recovered Message CID", "cid", sm.Cid())
|
||||
|
||||
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
|
||||
if err != nil {
|
||||
return recoveries, sm, xerrors.Errorf("declare faults recovered wait error: %w", err)
|
||||
}
|
||||
|
||||
if rec.Receipt.ExitCode != 0 {
|
||||
return recoveries, sm, xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||
}
|
||||
|
||||
return recoveries, sm, nil
|
||||
}
|
||||
|
||||
// declareFaults identifies the sectors on the specified proving deadline that
|
||||
// are faulty, and reports the faults on chain via the `DeclareFaults` message
|
||||
// to our miner actor.
|
||||
//
|
||||
// NOTE: THIS CODE ISN'T INVOKED AFTER THE IGNITION UPGRADE
|
||||
//
|
||||
// This is always invoked ahead of time, before the deadline for the evaluated
|
||||
// sectors arrives. That way, faults are declared before a penalty is accrued.
|
||||
//
|
||||
// If a declaration is made, it awaits for build.MessageConfidence confirmations
|
||||
// on chain before returning.
|
||||
//
|
||||
// TODO: the waiting should happen in the background. Right now this
|
||||
// is blocking/delaying the actual generation and submission of WindowPoSts in
|
||||
// this deadline!
|
||||
func (s *WindowPoStScheduler) declareFaults(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.FaultDeclaration, *types.SignedMessage, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "storage.declareFaults")
|
||||
defer span.End()
|
||||
|
||||
bad := uint64(0)
|
||||
params := &miner.DeclareFaultsParams{
|
||||
Faults: []miner.FaultDeclaration{},
|
||||
}
|
||||
|
||||
for partIdx, partition := range partitions {
|
||||
nonFaulty, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("determining non faulty sectors: %w", err)
|
||||
}
|
||||
|
||||
good, err := s.checkSectors(ctx, nonFaulty, tsk)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("checking sectors: %w", err)
|
||||
}
|
||||
|
||||
newFaulty, err := bitfield.SubtractBitField(nonFaulty, good)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("calculating faulty sector set: %w", err)
|
||||
}
|
||||
|
||||
c, err := newFaulty.Count()
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("counting faulty sectors: %w", err)
|
||||
}
|
||||
|
||||
if c == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
bad += c
|
||||
|
||||
params.Faults = append(params.Faults, miner.FaultDeclaration{
|
||||
Deadline: dlIdx,
|
||||
Partition: uint64(partIdx),
|
||||
Sectors: newFaulty,
|
||||
})
|
||||
}
|
||||
|
||||
faults := params.Faults
|
||||
if len(faults) == 0 {
|
||||
return faults, nil, nil
|
||||
}
|
||||
|
||||
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad)
|
||||
|
||||
enc, aerr := actors.SerializeParams(params)
|
||||
if aerr != nil {
|
||||
return faults, nil, xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
|
||||
}
|
||||
|
||||
msg := &types.Message{
|
||||
To: s.actor,
|
||||
Method: builtin.MethodsMiner.DeclareFaults,
|
||||
Params: enc,
|
||||
Value: types.NewInt(0), // TODO: Is there a fee?
|
||||
}
|
||||
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
|
||||
if err := s.prepareMessage(ctx, msg, spec); err != nil {
|
||||
return faults, nil, err
|
||||
}
|
||||
|
||||
sm, err := s.api.MpoolPushMessage(ctx, msg, spec)
|
||||
if err != nil {
|
||||
return faults, sm, xerrors.Errorf("pushing message to mpool: %w", err)
|
||||
}
|
||||
|
||||
log.Warnw("declare faults Message CID", "cid", sm.Cid())
|
||||
|
||||
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
|
||||
if err != nil {
|
||||
return faults, sm, xerrors.Errorf("declare faults wait error: %w", err)
|
||||
}
|
||||
|
||||
if rec.Receipt.ExitCode != 0 {
|
||||
return faults, sm, xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||
}
|
||||
|
||||
return faults, sm, nil
|
||||
}
|
||||
|
||||
func (s *WindowPoStScheduler) asyncFaultRecover(di dline.Info, ts *types.TipSet) {
|
||||
go func() {
|
||||
// check faults / recoveries for the *next* deadline. It's already too
|
||||
// late to declare them for this deadline
|
||||
declDeadline := (di.Index + 2) % di.WPoStPeriodDeadlines
|
||||
|
||||
partitions, err := s.api.StateMinerPartitions(context.TODO(), s.actor, declDeadline, ts.Key())
|
||||
if err != nil {
|
||||
log.Errorf("getting partitions: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
sigmsg *types.SignedMessage
|
||||
recoveries []miner.RecoveryDeclaration
|
||||
faults []miner.FaultDeclaration
|
||||
|
||||
// optionalCid returns the CID of the message, or cid.Undef is the
|
||||
// message is nil. We don't need the argument (could capture the
|
||||
// pointer), but it's clearer and purer like that.
|
||||
optionalCid = func(sigmsg *types.SignedMessage) cid.Cid {
|
||||
if sigmsg == nil {
|
||||
return cid.Undef
|
||||
}
|
||||
return sigmsg.Cid()
|
||||
}
|
||||
)
|
||||
|
||||
if recoveries, sigmsg, err = s.declareRecoveries(context.TODO(), declDeadline, partitions, ts.Key()); err != nil {
|
||||
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
|
||||
log.Errorf("checking sector recoveries: %v", err)
|
||||
}
|
||||
|
||||
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStRecoveries], func() interface{} {
|
||||
j := WdPoStRecoveriesProcessedEvt{
|
||||
evtCommon: s.getEvtCommon(err),
|
||||
Declarations: recoveries,
|
||||
MessageCID: optionalCid(sigmsg),
|
||||
}
|
||||
j.Error = err
|
||||
return j
|
||||
})
|
||||
|
||||
if ts.Height() > build.UpgradeIgnitionHeight {
|
||||
return // FORK: declaring faults after ignition upgrade makes no sense
|
||||
}
|
||||
|
||||
if faults, sigmsg, err = s.declareFaults(context.TODO(), declDeadline, partitions, ts.Key()); err != nil {
|
||||
// TODO: This is also potentially really bad, but we try to post anyways
|
||||
log.Errorf("checking sector faults: %v", err)
|
||||
}
|
||||
|
||||
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStFaults], func() interface{} {
|
||||
return WdPoStFaultsProcessedEvt{
|
||||
evtCommon: s.getEvtCommon(err),
|
||||
Declarations: faults,
|
||||
MessageCID: optionalCid(sigmsg),
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
@ -70,6 +70,7 @@ type WindowPoStScheduler struct {
|
||||
faultTracker sealer.FaultTracker
|
||||
proofType abi.RegisteredPoStProof
|
||||
partitionSectors uint64
|
||||
disablePreChecks bool
|
||||
ch *changeHandler
|
||||
|
||||
actor address.Address
|
||||
@ -84,6 +85,7 @@ type WindowPoStScheduler struct {
|
||||
// NewWindowedPoStScheduler creates a new WindowPoStScheduler scheduler.
|
||||
func NewWindowedPoStScheduler(api NodeAPI,
|
||||
cfg config.MinerFeeConfig,
|
||||
pcfg config.ProvingConfig,
|
||||
as *ctladdr.AddressSelector,
|
||||
sp storiface.ProverPoSt,
|
||||
verif storiface.Verifier,
|
||||
@ -104,6 +106,7 @@ func NewWindowedPoStScheduler(api NodeAPI,
|
||||
faultTracker: ft,
|
||||
proofType: mi.WindowPoStProofType,
|
||||
partitionSectors: mi.WindowPoStPartitionSectors,
|
||||
disablePreChecks: pcfg.DisableWDPoStPreChecks,
|
||||
|
||||
actor: actor,
|
||||
evtTypes: [...]journal.EventType{
|
||||
|
Loading…
Reference in New Issue
Block a user