Merge pull request #1873 from filecoin-project/feat/better-post-faults
wdpost: Better fault handling
This commit is contained in:
commit
ad4712b593
@ -134,6 +134,9 @@ jobs:
|
|||||||
type: string
|
type: string
|
||||||
default: "./..."
|
default: "./..."
|
||||||
description: Import paths of packages to be tested.
|
description: Import paths of packages to be tested.
|
||||||
|
winpost-test:
|
||||||
|
type: string
|
||||||
|
default: "0"
|
||||||
test-suite-name:
|
test-suite-name:
|
||||||
type: string
|
type: string
|
||||||
default: unit
|
default: unit
|
||||||
@ -171,6 +174,7 @@ jobs:
|
|||||||
environment:
|
environment:
|
||||||
GOTESTSUM_JUNITFILE: /tmp/test-reports/<< parameters.test-suite-name >>/junit.xml
|
GOTESTSUM_JUNITFILE: /tmp/test-reports/<< parameters.test-suite-name >>/junit.xml
|
||||||
GOTESTSUM_FORMAT: << parameters.gotestsum-format >>
|
GOTESTSUM_FORMAT: << parameters.gotestsum-format >>
|
||||||
|
LOTUS_TEST_WINDOW_POST: << parameters.winpost-test >>
|
||||||
command: |
|
command: |
|
||||||
mkdir -p /tmp/test-reports/<< parameters.test-suite-name >>
|
mkdir -p /tmp/test-reports/<< parameters.test-suite-name >>
|
||||||
gotestsum -- \
|
gotestsum -- \
|
||||||
@ -199,6 +203,8 @@ jobs:
|
|||||||
|
|
||||||
test-short:
|
test-short:
|
||||||
<<: *test
|
<<: *test
|
||||||
|
test-window-post:
|
||||||
|
<<: *test
|
||||||
|
|
||||||
build-macos:
|
build-macos:
|
||||||
description: build darwin lotus binary
|
description: build darwin lotus binary
|
||||||
@ -332,10 +338,13 @@ workflows:
|
|||||||
jobs:
|
jobs:
|
||||||
- lint-changes:
|
- lint-changes:
|
||||||
args: "--new-from-rev origin/next"
|
args: "--new-from-rev origin/next"
|
||||||
- test:
|
|
||||||
codecov-upload: true
|
|
||||||
- mod-tidy-check
|
- mod-tidy-check
|
||||||
- gofmt
|
- gofmt
|
||||||
|
- test:
|
||||||
|
codecov-upload: true
|
||||||
|
- test-window-post:
|
||||||
|
go-test-flags: "-run=TestWindowedPost"
|
||||||
|
winpost-test: "1"
|
||||||
- test-short:
|
- test-short:
|
||||||
go-test-flags: "--timeout 10m --short"
|
go-test-flags: "--timeout 10m --short"
|
||||||
filters:
|
filters:
|
||||||
|
@ -8,11 +8,10 @@ import (
|
|||||||
|
|
||||||
"github.com/filecoin-project/go-address"
|
"github.com/filecoin-project/go-address"
|
||||||
"github.com/filecoin-project/go-fil-markets/storagemarket"
|
"github.com/filecoin-project/go-fil-markets/storagemarket"
|
||||||
|
"github.com/filecoin-project/lotus/chain/types"
|
||||||
"github.com/filecoin-project/sector-storage/stores"
|
"github.com/filecoin-project/sector-storage/stores"
|
||||||
"github.com/filecoin-project/sector-storage/storiface"
|
"github.com/filecoin-project/sector-storage/storiface"
|
||||||
"github.com/filecoin-project/specs-actors/actors/abi"
|
"github.com/filecoin-project/specs-actors/actors/abi"
|
||||||
|
|
||||||
"github.com/filecoin-project/lotus/chain/types"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// StorageMiner is a low-level interface to the Filecoin network storage miner node
|
// StorageMiner is a low-level interface to the Filecoin network storage miner node
|
||||||
|
@ -20,6 +20,7 @@ type TestStorageNode struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var PresealGenesis = -1
|
var PresealGenesis = -1
|
||||||
|
const GenesisPreseals = 2
|
||||||
|
|
||||||
type StorageMiner struct {
|
type StorageMiner struct {
|
||||||
Full int
|
Full int
|
||||||
|
169
api/test/window_post.go
Normal file
169
api/test/window_post.go
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
package test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"github.com/filecoin-project/lotus/api"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/filecoin-project/specs-actors/actors/abi"
|
||||||
|
miner2 "github.com/filecoin-project/specs-actors/actors/builtin/miner"
|
||||||
|
sealing "github.com/filecoin-project/storage-fsm"
|
||||||
|
|
||||||
|
"github.com/filecoin-project/lotus/chain/types"
|
||||||
|
"github.com/filecoin-project/lotus/node/impl"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPledgeSector(t *testing.T, b APIBuilder, blocktime time.Duration, nSectors int) {
|
||||||
|
os.Setenv("BELLMAN_NO_GPU", "1")
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
n, sn := b(t, 1, oneMiner)
|
||||||
|
client := n[0].FullNode.(*impl.FullNodeAPI)
|
||||||
|
miner := sn[0]
|
||||||
|
|
||||||
|
addrinfo, err := client.NetAddrsListen(ctx)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := miner.NetConnect(ctx, addrinfo); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
|
||||||
|
mine := true
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
for mine {
|
||||||
|
time.Sleep(blocktime)
|
||||||
|
if err := sn[0].MineOne(ctx, func(bool) {}); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
pledgeSectors(t, ctx, miner, nSectors)
|
||||||
|
|
||||||
|
mine = false
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
|
||||||
|
func pledgeSectors(t *testing.T, ctx context.Context, miner TestStorageNode, n int) {
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
err := miner.PledgeSector(ctx)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
s, err := miner.SectorsList(ctx) // Note - the test builder doesn't import genesis sectors into FSM
|
||||||
|
require.NoError(t, err)
|
||||||
|
fmt.Printf("Sectors: %d\n", len(s))
|
||||||
|
if len(s) >= n {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("All sectors is fsm\n")
|
||||||
|
|
||||||
|
s, err := miner.SectorsList(ctx)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
toCheck := map[abi.SectorNumber]struct{}{}
|
||||||
|
for _, number := range s {
|
||||||
|
toCheck[number] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for len(toCheck) > 0 {
|
||||||
|
for n := range toCheck {
|
||||||
|
st, err := miner.SectorsStatus(ctx, n)
|
||||||
|
require.NoError(t, err)
|
||||||
|
if st.State == api.SectorState(sealing.Proving) {
|
||||||
|
delete(toCheck, n)
|
||||||
|
}
|
||||||
|
if strings.Contains(string(st.State), "Fail") {
|
||||||
|
t.Fatal("sector in a failed state", st.State)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
fmt.Printf("WaitSeal: %d\n", len(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWindowPost(t *testing.T, b APIBuilder, blocktime time.Duration, nSectors int) {
|
||||||
|
os.Setenv("BELLMAN_NO_GPU", "1")
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
n, sn := b(t, 1, oneMiner)
|
||||||
|
client := n[0].FullNode.(*impl.FullNodeAPI)
|
||||||
|
miner := sn[0]
|
||||||
|
|
||||||
|
addrinfo, err := client.NetAddrsListen(ctx)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := miner.NetConnect(ctx, addrinfo); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
|
||||||
|
mine := true
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
for mine {
|
||||||
|
time.Sleep(blocktime)
|
||||||
|
if err := sn[0].MineOne(ctx, func(bool) {}); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
pledgeSectors(t, ctx, miner, nSectors)
|
||||||
|
|
||||||
|
maddr, err := miner.ActorAddress(ctx)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
fmt.Printf("Running one proving periods\n")
|
||||||
|
|
||||||
|
for {
|
||||||
|
head, err := client.ChainHead(ctx)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
if head.Height() > di.PeriodStart + (miner2.WPoStProvingPeriod) + 2 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if head.Height() % 100 == 0 {
|
||||||
|
fmt.Printf("@%d\n", head.Height())
|
||||||
|
}
|
||||||
|
time.Sleep(blocktime)
|
||||||
|
}
|
||||||
|
|
||||||
|
p, err := client.StateMinerPower(ctx, maddr, types.EmptyTSK)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
ssz, err := miner.ActorSectorSize(ctx, maddr)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, p.MinerPower, p.TotalPower)
|
||||||
|
require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz) * uint64(nSectors + GenesisPreseals)))
|
||||||
|
|
||||||
|
// TODO: Inject faults here
|
||||||
|
|
||||||
|
mine = false
|
||||||
|
<-done
|
||||||
|
}
|
@ -6,6 +6,7 @@ import (
|
|||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -83,7 +84,7 @@ func testStorageNode(ctx context.Context, t *testing.T, waddr address.Address, a
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
nic := storedcounter.New(ds, datastore.NewKey(modules.StorageCounterDSPrefix))
|
nic := storedcounter.New(ds, datastore.NewKey(modules.StorageCounterDSPrefix))
|
||||||
for i := 0; i < nGenesisPreseals; i++ {
|
for i := 0; i < test.GenesisPreseals; i++ {
|
||||||
_, err := nic.Next()
|
_, err := nic.Next()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
@ -188,7 +189,7 @@ func builder(t *testing.T, nFull int, storage []test.StorageMiner) ([]test.TestN
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
genm, k, err := seed.PreSeal(maddr, abi.RegisteredProof_StackedDRG2KiBPoSt, 0, nGenesisPreseals, tdir, []byte("make genesis mem random"), nil)
|
genm, k, err := seed.PreSeal(maddr, abi.RegisteredProof_StackedDRG2KiBPoSt, 0, test.GenesisPreseals, tdir, []byte("make genesis mem random"), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -282,8 +283,6 @@ func builder(t *testing.T, nFull int, storage []test.StorageMiner) ([]test.TestN
|
|||||||
return fulls, storers
|
return fulls, storers
|
||||||
}
|
}
|
||||||
|
|
||||||
const nGenesisPreseals = 2
|
|
||||||
|
|
||||||
func mockSbBuilder(t *testing.T, nFull int, storage []test.StorageMiner) ([]test.TestNode, []test.TestStorageNode) {
|
func mockSbBuilder(t *testing.T, nFull int, storage []test.StorageMiner) ([]test.TestNode, []test.TestStorageNode) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
mn := mocknet.New(ctx)
|
mn := mocknet.New(ctx)
|
||||||
@ -314,7 +313,7 @@ func mockSbBuilder(t *testing.T, nFull int, storage []test.StorageMiner) ([]test
|
|||||||
|
|
||||||
preseals := storage[i].Preseal
|
preseals := storage[i].Preseal
|
||||||
if preseals == test.PresealGenesis {
|
if preseals == test.PresealGenesis {
|
||||||
preseals = nGenesisPreseals
|
preseals = test.GenesisPreseals
|
||||||
}
|
}
|
||||||
|
|
||||||
genm, k, err := mockstorage.PreSeal(2048, maddr, preseals)
|
genm, k, err := mockstorage.PreSeal(2048, maddr, preseals)
|
||||||
@ -350,7 +349,7 @@ func mockSbBuilder(t *testing.T, nFull int, storage []test.StorageMiner) ([]test
|
|||||||
templ := &genesis.Template{
|
templ := &genesis.Template{
|
||||||
Accounts: genaccs,
|
Accounts: genaccs,
|
||||||
Miners: genms,
|
Miners: genms,
|
||||||
Timestamp: uint64(time.Now().Unix() - 10000),
|
Timestamp: uint64(time.Now().Unix() - (build.BlockDelay * 20000)),
|
||||||
}
|
}
|
||||||
|
|
||||||
// END PRESEAL SECTION
|
// END PRESEAL SECTION
|
||||||
@ -493,3 +492,41 @@ func TestDealMining(t *testing.T) {
|
|||||||
|
|
||||||
test.TestDealMining(t, mockSbBuilder, 50*time.Millisecond, false)
|
test.TestDealMining(t, mockSbBuilder, 50*time.Millisecond, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPledgeSectors(t *testing.T) {
|
||||||
|
logging.SetLogLevel("miner", "ERROR")
|
||||||
|
logging.SetLogLevel("chainstore", "ERROR")
|
||||||
|
logging.SetLogLevel("chain", "ERROR")
|
||||||
|
logging.SetLogLevel("sub", "ERROR")
|
||||||
|
logging.SetLogLevel("storageminer", "ERROR")
|
||||||
|
|
||||||
|
t.Run("1", func(t *testing.T) {
|
||||||
|
test.TestPledgeSector(t, mockSbBuilder, 50*time.Millisecond, 1)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("100", func(t *testing.T) {
|
||||||
|
test.TestPledgeSector(t, mockSbBuilder, 50*time.Millisecond, 100)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("1000", func(t *testing.T) {
|
||||||
|
if testing.Short() { // takes ~16s
|
||||||
|
t.Skip("skipping test in short mode")
|
||||||
|
}
|
||||||
|
|
||||||
|
test.TestPledgeSector(t, mockSbBuilder, 50*time.Millisecond, 1000)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWindowedPost(t *testing.T) {
|
||||||
|
if os.Getenv("LOTUS_TEST_WINDOW_POST") != "1" {
|
||||||
|
t.Skip("this takes a few minutes, set LOTUS_TEST_WINDOW_POST=1 to run")
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.SetLogLevel("miner", "ERROR")
|
||||||
|
logging.SetLogLevel("chainstore", "ERROR")
|
||||||
|
logging.SetLogLevel("chain", "ERROR")
|
||||||
|
logging.SetLogLevel("sub", "ERROR")
|
||||||
|
logging.SetLogLevel("storageminer", "ERROR")
|
||||||
|
|
||||||
|
test.TestWindowPost(t, mockSbBuilder, 5*time.Millisecond, 10)
|
||||||
|
}
|
||||||
|
@ -62,7 +62,56 @@ func (s *WindowPoStScheduler) doPost(ctx context.Context, deadline *miner.Deadli
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *WindowPoStScheduler) checkRecoveries(ctx context.Context, deadline uint64, ts *types.TipSet) error {
|
func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check *abi.BitField) (*abi.BitField, error) {
|
||||||
|
spt, err := s.proofType.RegisteredSealProof()
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("getting seal proof type: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
mid, err := address.IDFromAddress(s.actor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
sectors := make(map[abi.SectorID]struct{})
|
||||||
|
var tocheck []abi.SectorID
|
||||||
|
err = check.ForEach(func(snum uint64) error {
|
||||||
|
s := abi.SectorID{
|
||||||
|
Miner: abi.ActorID(mid),
|
||||||
|
Number: abi.SectorNumber(snum),
|
||||||
|
}
|
||||||
|
|
||||||
|
tocheck = append(tocheck, s)
|
||||||
|
sectors[s] = struct{}{}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("iterating over bitfield: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
bad, err := s.faultTracker.CheckProvable(ctx, spt, tocheck)
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("checking provable sectors: %w", err)
|
||||||
|
}
|
||||||
|
for _, id := range bad {
|
||||||
|
delete(sectors, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warnw("Checked sectors", "checked", len(tocheck), "good", len(sectors))
|
||||||
|
|
||||||
|
if len(sectors) == 0 { // nothing to recover
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sbf := bitfield.New()
|
||||||
|
for s := range sectors {
|
||||||
|
(&sbf).Set(uint64(s.Number))
|
||||||
|
}
|
||||||
|
|
||||||
|
return &sbf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, deadline uint64, ts *types.TipSet) error {
|
||||||
faults, err := s.api.StateMinerFaults(ctx, s.actor, ts.Key())
|
faults, err := s.api.StateMinerFaults(ctx, s.actor, ts.Key())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("getting on-chain faults: %w", err)
|
return xerrors.Errorf("getting on-chain faults: %w", err)
|
||||||
@ -96,53 +145,13 @@ func (s *WindowPoStScheduler) checkRecoveries(ctx context.Context, deadline uint
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
spt, err := s.proofType.RegisteredSealProof()
|
sbf, err := s.checkSectors(ctx, unrecovered)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("getting seal proof type: %w", err)
|
return xerrors.Errorf("checking unrecovered sectors: %w", err)
|
||||||
}
|
|
||||||
|
|
||||||
mid, err := address.IDFromAddress(s.actor)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
sectors := make(map[abi.SectorID]struct{})
|
|
||||||
var tocheck []abi.SectorID
|
|
||||||
err = unrecovered.ForEach(func(snum uint64) error {
|
|
||||||
s := abi.SectorID{
|
|
||||||
Miner: abi.ActorID(mid),
|
|
||||||
Number: abi.SectorNumber(snum),
|
|
||||||
}
|
|
||||||
|
|
||||||
tocheck = append(tocheck, s)
|
|
||||||
sectors[s] = struct{}{}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return xerrors.Errorf("iterating over unrecovered bitfield: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
bad, err := s.faultTracker.CheckProvable(ctx, spt, tocheck)
|
|
||||||
if err != nil {
|
|
||||||
return xerrors.Errorf("checking provable sectors: %w", err)
|
|
||||||
}
|
|
||||||
for _, id := range bad {
|
|
||||||
delete(sectors, id)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Warnw("Recoverable sectors", "faulty", len(tocheck), "recoverable", len(sectors))
|
|
||||||
|
|
||||||
if len(sectors) == 0 { // nothing to recover
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
sbf := bitfield.New()
|
|
||||||
for s := range sectors {
|
|
||||||
(&sbf).Set(uint64(s.Number))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params := &miner.DeclareFaultsRecoveredParams{
|
params := &miner.DeclareFaultsRecoveredParams{
|
||||||
Recoveries: []miner.RecoveryDeclaration{{Deadline: deadline, Sectors: &sbf}},
|
Recoveries: []miner.RecoveryDeclaration{{Deadline: deadline, Sectors: sbf}},
|
||||||
}
|
}
|
||||||
|
|
||||||
enc, aerr := actors.SerializeParams(params)
|
enc, aerr := actors.SerializeParams(params)
|
||||||
@ -179,34 +188,93 @@ func (s *WindowPoStScheduler) checkRecoveries(ctx context.Context, deadline uint
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *WindowPoStScheduler) checkFaults(ctx context.Context, ssi []abi.SectorNumber) ([]abi.SectorNumber, error) {
|
func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, deadline uint64, deadlineSectors *abi.BitField, ts *types.TipSet) error {
|
||||||
//faults := s.prover.Scrub(ssi)
|
dc, err := deadlineSectors.Count()
|
||||||
log.Warnf("Stub checkFaults")
|
if err != nil {
|
||||||
|
return xerrors.Errorf("counting deadline sectors: %w", err)
|
||||||
|
}
|
||||||
|
if dc == 0 {
|
||||||
|
// nothing can become faulty
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
/*declaredFaults := map[abi.SectorNumber]struct{}{}
|
toCheck, err := s.getSectorsToProve(ctx, deadlineSectors, true, ts)
|
||||||
|
if err != nil {
|
||||||
|
return xerrors.Errorf("getting next sectors to prove: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
{
|
good, err := s.checkSectors(ctx, deadlineSectors)
|
||||||
chainFaults, err := s.api.StateMinerFaults(ctx, s.actor, types.EmptyTSK)
|
if err != nil {
|
||||||
if err != nil {
|
return xerrors.Errorf("checking sectors: %w", err)
|
||||||
return nil, xerrors.Errorf("checking on-chain faults: %w", err)
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for _, fault := range chainFaults {
|
faulty, err := bitfield.SubtractBitField(toCheck, good)
|
||||||
declaredFaults[fault] = struct{}{}
|
if err != nil {
|
||||||
}
|
return xerrors.Errorf("calculating faulty sector set: %w", err)
|
||||||
}*/
|
}
|
||||||
|
|
||||||
return nil, nil
|
c, err := faulty.Count()
|
||||||
|
if err != nil {
|
||||||
|
return xerrors.Errorf("counting faulty sectors: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if c == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", c)
|
||||||
|
|
||||||
|
params := &miner.DeclareFaultsParams{
|
||||||
|
Faults: []miner.FaultDeclaration{
|
||||||
|
{
|
||||||
|
Deadline: deadline,
|
||||||
|
Sectors: faulty,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
enc, aerr := actors.SerializeParams(params)
|
||||||
|
if aerr != nil {
|
||||||
|
return xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
|
||||||
|
}
|
||||||
|
|
||||||
|
msg := &types.Message{
|
||||||
|
To: s.actor,
|
||||||
|
From: s.worker,
|
||||||
|
Method: builtin.MethodsMiner.DeclareFaults,
|
||||||
|
Params: enc,
|
||||||
|
Value: types.NewInt(0), // TODO: Is there a fee?
|
||||||
|
GasLimit: 10000000, // i dont know help
|
||||||
|
GasPrice: types.NewInt(2),
|
||||||
|
}
|
||||||
|
|
||||||
|
sm, err := s.api.MpoolPushMessage(ctx, msg)
|
||||||
|
if err != nil {
|
||||||
|
return xerrors.Errorf("pushing message to mpool: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warnw("declare faults Message CID", "cid", sm.Cid())
|
||||||
|
|
||||||
|
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid())
|
||||||
|
if err != nil {
|
||||||
|
return xerrors.Errorf("declare faults wait error: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if rec.Receipt.ExitCode != 0 {
|
||||||
|
return xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// the input sectors must match with the miner actor
|
// the input sectors must match with the miner actor
|
||||||
func (s *WindowPoStScheduler) getNeedProveSectors(ctx context.Context, deadlineSectors *abi.BitField, ts *types.TipSet) (*abi.BitField, error) {
|
func (s *WindowPoStScheduler) getSectorsToProve(ctx context.Context, deadlineSectors *abi.BitField, ignoreRecoveries bool, ts *types.TipSet) (*abi.BitField, error) {
|
||||||
faults, err := s.api.StateMinerFaults(ctx, s.actor, ts.Key())
|
stateFaults, err := s.api.StateMinerFaults(ctx, s.actor, ts.Key())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("getting on-chain faults: %w", err)
|
return nil, xerrors.Errorf("getting on-chain faults: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
declaredFaults, err := bitfield.IntersectBitField(deadlineSectors, faults)
|
faults, err := bitfield.IntersectBitField(deadlineSectors, stateFaults)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("failed to intersect proof sectors with faults: %w", err)
|
return nil, xerrors.Errorf("failed to intersect proof sectors with faults: %w", err)
|
||||||
}
|
}
|
||||||
@ -216,17 +284,19 @@ func (s *WindowPoStScheduler) getNeedProveSectors(ctx context.Context, deadlineS
|
|||||||
return nil, xerrors.Errorf("getting on-chain recoveries: %w", err)
|
return nil, xerrors.Errorf("getting on-chain recoveries: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedRecoveries, err := bitfield.IntersectBitField(declaredFaults, recoveries)
|
if !ignoreRecoveries {
|
||||||
if err != nil {
|
expectedRecoveries, err := bitfield.IntersectBitField(faults, recoveries)
|
||||||
return nil, xerrors.Errorf("failed to intersect recoveries with faults: %w", err)
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("failed to intersect recoveries with faults: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
faults, err = bitfield.SubtractBitField(faults, expectedRecoveries)
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("failed to subtract recoveries from faults: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedFaults, err := bitfield.SubtractBitField(declaredFaults, expectedRecoveries)
|
nonFaults, err := bitfield.SubtractBitField(deadlineSectors, faults)
|
||||||
if err != nil {
|
|
||||||
return nil, xerrors.Errorf("failed to subtract recoveries from faults: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
nonFaults, err := bitfield.SubtractBitField(deadlineSectors, expectedFaults)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("failed to diff bitfields: %w", err)
|
return nil, xerrors.Errorf("failed to diff bitfields: %w", err)
|
||||||
}
|
}
|
||||||
@ -246,11 +316,26 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
|
|||||||
ctx, span := trace.StartSpan(ctx, "storage.runPost")
|
ctx, span := trace.StartSpan(ctx, "storage.runPost")
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
// check recoveries for the *next* deadline. It's already too late to
|
deadlines, err := s.api.StateMinerDeadlines(ctx, s.actor, ts.Key())
|
||||||
// declare them for this deadline
|
if err != nil {
|
||||||
if err := s.checkRecoveries(ctx, (di.Index+1)%miner.WPoStPeriodDeadlines, ts); err != nil {
|
return nil, xerrors.Errorf("getting miner deadlines: %w", err)
|
||||||
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
|
}
|
||||||
log.Errorf("checking sector recoveries: %v", err)
|
|
||||||
|
{
|
||||||
|
// check faults / recoveries for the *next* deadline. It's already too
|
||||||
|
// late to declare them for this deadline
|
||||||
|
declDeadline := (di.Index + 1) % miner.WPoStPeriodDeadlines
|
||||||
|
|
||||||
|
if err := s.checkNextRecoveries(ctx, declDeadline, ts); err != nil {
|
||||||
|
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
|
||||||
|
log.Errorf("checking sector recoveries: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.checkNextFaults(ctx, declDeadline, deadlines.Due[declDeadline], ts); err != nil {
|
||||||
|
// TODO: This is also potentially really bad, but we try to post anyways
|
||||||
|
log.Errorf("checking sector faults: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
@ -262,11 +347,6 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
|
|||||||
return nil, xerrors.Errorf("failed to get chain randomness for windowPost (ts=%d; deadline=%d): %w", ts.Height(), di, err)
|
return nil, xerrors.Errorf("failed to get chain randomness for windowPost (ts=%d; deadline=%d): %w", ts.Height(), di, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
deadlines, err := s.api.StateMinerDeadlines(ctx, s.actor, ts.Key())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
firstPartition, _, err := miner.PartitionsForDeadline(deadlines, s.partitionSectors, di.Index)
|
firstPartition, _, err := miner.PartitionsForDeadline(deadlines, s.partitionSectors, di.Index)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("getting partitions for deadline: %w", err)
|
return nil, xerrors.Errorf("getting partitions for deadline: %w", err)
|
||||||
@ -297,11 +377,29 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
|
|||||||
partitions[i] = firstPartition + uint64(i)
|
partitions[i] = firstPartition + uint64(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
nps, err := s.getNeedProveSectors(ctx, deadlines.Due[di.Index], ts)
|
nps, err := s.getSectorsToProve(ctx, deadlines.Due[di.Index], false, ts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("get need prove sectors: %w", err)
|
return nil, xerrors.Errorf("get need prove sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var skipped *abi.BitField
|
||||||
|
{
|
||||||
|
good, err := s.checkSectors(ctx, nps)
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
skipped, err = bitfield.SubtractBitField(nps, good)
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("nps - good: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
skipCount, err := skipped.Count()
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("getting skipped sector count: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
ssi, err := s.sortedSectorInfo(ctx, nps, ts)
|
ssi, err := s.sortedSectorInfo(ctx, nps, ts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("getting sorted sector info: %w", err)
|
return nil, xerrors.Errorf("getting sorted sector info: %w", err)
|
||||||
@ -315,23 +413,18 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
|
|||||||
log.Infow("running windowPost",
|
log.Infow("running windowPost",
|
||||||
"chain-random", rand,
|
"chain-random", rand,
|
||||||
"deadline", di,
|
"deadline", di,
|
||||||
"height", ts.Height())
|
"height", ts.Height(),
|
||||||
|
"skipped", skipCount)
|
||||||
|
|
||||||
var snums []abi.SectorNumber
|
var snums []abi.SectorNumber
|
||||||
for _, si := range ssi {
|
for _, si := range ssi {
|
||||||
snums = append(snums, si.SectorNumber)
|
snums = append(snums, si.SectorNumber)
|
||||||
}
|
}
|
||||||
|
|
||||||
faults, err := s.checkFaults(ctx, snums)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Failed to declare faults: %+v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
tsStart := time.Now()
|
tsStart := time.Now()
|
||||||
|
|
||||||
log.Infow("generating windowPost",
|
log.Infow("generating windowPost",
|
||||||
"sectors", len(ssi),
|
"sectors", len(ssi))
|
||||||
"faults", len(faults))
|
|
||||||
|
|
||||||
mid, err := address.IDFromAddress(s.actor)
|
mid, err := address.IDFromAddress(s.actor)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -355,7 +448,7 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
|
|||||||
Deadline: di.Index,
|
Deadline: di.Index,
|
||||||
Partitions: partitions,
|
Partitions: partitions,
|
||||||
Proofs: postOut,
|
Proofs: postOut,
|
||||||
Skipped: *abi.NewBitField(), // TODO: Faults here?
|
Skipped: *skipped,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user