lpseal: clean up poller loop

This commit is contained in:
Łukasz Magiera 2024-01-17 12:42:43 +01:00
parent af79813af8
commit fabd413e5e

View File

@ -2,6 +2,7 @@ package lpseal
import ( import (
"context" "context"
"github.com/filecoin-project/lotus/chain/actors/policy"
"time" "time"
logging "github.com/ipfs/go-log/v2" logging "github.com/ipfs/go-log/v2"
@ -11,7 +12,6 @@ import (
"github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/lotus/chain/actors/builtin/miner" "github.com/filecoin-project/lotus/chain/actors/builtin/miner"
"github.com/filecoin-project/lotus/chain/actors/policy"
"github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/lib/harmony/harmonydb" "github.com/filecoin-project/lotus/lib/harmony/harmonydb"
"github.com/filecoin-project/lotus/lib/harmony/harmonytask" "github.com/filecoin-project/lotus/lib/harmony/harmonytask"
@ -69,41 +69,43 @@ func (s *SealPoller) RunPoller(ctx context.Context) {
} }
} }
type pollTask struct {
SpID int64 `db:"sp_id"`
SectorNumber int64 `db:"sector_number"`
TaskSDR *int64 `db:"task_id_sdr"`
AfterSDR bool `db:"after_sdr"`
TaskTreeD *int64 `db:"task_id_tree_d"`
AfterTreeD bool `db:"after_tree_d"`
TaskTreeC *int64 `db:"task_id_tree_c"`
AfterTreeC bool `db:"after_tree_c"`
TaskTreeR *int64 `db:"task_id_tree_r"`
AfterTreeR bool `db:"after_tree_r"`
TaskPrecommitMsg *int64 `db:"task_id_precommit_msg"`
AfterPrecommitMsg bool `db:"after_precommit_msg"`
AfterPrecommitMsgSuccess bool `db:"after_precommit_msg_success"`
SeedEpoch *int64 `db:"seed_epoch"`
TaskPoRep *int64 `db:"task_id_porep"`
PoRepProof []byte `db:"porep_proof"`
AfterPoRep bool `db:"after_porep"`
TaskCommitMsg *int64 `db:"task_id_commit_msg"`
AfterCommitMsg bool `db:"after_commit_msg"`
AfterCommitMsgSuccess bool `db:"after_commit_msg_success"`
Failed bool `db:"failed"`
FailedReason string `db:"failed_reason"`
}
func (s *SealPoller) poll(ctx context.Context) error { func (s *SealPoller) poll(ctx context.Context) error {
var tasks []struct { var tasks []pollTask
SpID int64 `db:"sp_id"`
SectorNumber int64 `db:"sector_number"`
TaskSDR *int64 `db:"task_id_sdr"`
AfterSDR bool `db:"after_sdr"`
TaskTreeD *int64 `db:"task_id_tree_d"`
AfterTreeD bool `db:"after_tree_d"`
TaskTreeC *int64 `db:"task_id_tree_c"`
AfterTreeC bool `db:"after_tree_c"`
TaskTreeR *int64 `db:"task_id_tree_r"`
AfterTreeR bool `db:"after_tree_r"`
TaskPrecommitMsg *int64 `db:"task_id_precommit_msg"`
AfterPrecommitMsg bool `db:"after_precommit_msg"`
AfterPrecommitMsgSuccess bool `db:"after_precommit_msg_success"`
SeedEpoch *int64 `db:"seed_epoch"`
TaskPoRep *int64 `db:"task_id_porep"`
PoRepProof []byte `db:"porep_proof"`
AfterPoRep bool `db:"after_porep"`
TaskCommitMsg *int64 `db:"task_id_commit_msg"`
AfterCommitMsg bool `db:"after_commit_msg"`
AfterCommitMsgSuccess bool `db:"after_commit_msg_success"`
Failed bool `db:"failed"`
FailedReason string `db:"failed_reason"`
}
err := s.db.Select(ctx, &tasks, `SELECT err := s.db.Select(ctx, &tasks, `SELECT
sp_id, sector_number, sp_id, sector_number,
@ -128,167 +130,194 @@ func (s *SealPoller) poll(ctx context.Context) error {
continue continue
} }
if task.TaskSDR == nil && s.pollers[pollerSDR].IsSet() {
s.pollers[pollerSDR].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_sdr = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_sdr is null`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
if task.TaskTreeD == nil && task.TaskTreeC == nil && task.TaskTreeR == nil && s.pollers[pollerTrees].IsSet() && task.AfterSDR {
s.pollers[pollerTrees].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_tree_d = $1, task_id_tree_c = $1, task_id_tree_r = $1
WHERE sp_id = $2 AND sector_number = $3 and after_sdr = true and task_id_tree_d is null and task_id_tree_c is null and task_id_tree_r is null`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
if task.TaskPrecommitMsg == nil && task.AfterTreeR && task.AfterTreeD {
s.pollers[pollerPrecommitMsg].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_precommit_msg = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_precommit_msg is null and after_tree_r = true and after_tree_d = true`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
if task.TaskPrecommitMsg != nil && !task.AfterPrecommitMsgSuccess {
var execResult []struct {
ExecutedTskCID string `db:"executed_tsk_cid"`
ExecutedTskEpoch int64 `db:"executed_tsk_epoch"`
ExecutedMsgCID string `db:"executed_msg_cid"`
ExecutedRcptExitCode int64 `db:"executed_rcpt_exitcode"`
ExecutedRcptGasUsed int64 `db:"executed_rcpt_gas_used"`
}
err := s.db.Select(ctx, &execResult, `SELECT executed_tsk_cid, executed_tsk_epoch, executed_msg_cid, executed_rcpt_exitcode, executed_rcpt_gas_used
FROM sectors_sdr_pipeline
JOIN message_waits ON sectors_sdr_pipeline.precommit_msg_cid = message_waits.signed_message_cid
WHERE sp_id = $1 AND sector_number = $2 AND executed_tsk_epoch is not null`, task.SpID, task.SectorNumber)
if err != nil {
log.Errorw("failed to query message_waits", "error", err)
}
if len(execResult) > 0 {
maddr, err := address.NewIDAddress(uint64(task.SpID))
if err != nil {
return err
}
pci, err := s.api.StateSectorPreCommitInfo(ctx, maddr, abi.SectorNumber(task.SectorNumber), types.EmptyTSK)
if err != nil {
return xerrors.Errorf("get precommit info: %w", err)
}
if pci != nil {
randHeight := pci.PreCommitEpoch + policy.GetPreCommitChallengeDelay()
_, err := s.db.Exec(ctx, `UPDATE sectors_sdr_pipeline SET
seed_epoch = $1, precommit_msg_tsk = $2, after_precommit_msg_success = true
WHERE sp_id = $3 AND sector_number = $4 and seed_epoch is NULL`,
randHeight, execResult[0].ExecutedTskCID, task.SpID, task.SectorNumber)
if err != nil {
return xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
} // todo handle missing precommit info (eg expired precommit)
}
}
ts, err := s.api.ChainHead(ctx) ts, err := s.api.ChainHead(ctx)
if err != nil { if err != nil {
return xerrors.Errorf("getting chain head: %w", err) return xerrors.Errorf("getting chain head: %w", err)
} }
if s.pollers[pollerPoRep].IsSet() && task.AfterPrecommitMsgSuccess && task.SeedEpoch != nil && task.TaskPoRep == nil && ts.Height() >= abi.ChainEpoch(*task.SeedEpoch+seedEpochConfidence) { s.pollStartSDR(ctx, task)
s.pollers[pollerPoRep].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) { s.pollStartSDRTrees(ctx, task)
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_porep = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_porep is null`, id, task.SpID, task.SectorNumber) s.pollStartPrecommitMsg(ctx, task)
if err != nil { s.mustPoll(s.pollPrecommitMsgLanded(ctx, task))
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err) s.pollStartPoRep(ctx, task, ts)
} s.pollStartCommitMsg(ctx, task)
if n != 1 { s.mustPoll(s.pollCommitMsgLanded(ctx, task))
return false, xerrors.Errorf("expected to update 1 row, updated %d", n) }
}
return true, nil return nil
}) }
}
if task.AfterPoRep && len(task.PoRepProof) > 0 && task.TaskCommitMsg == nil && s.pollers[pollerCommitMsg].IsSet() { func (s *SealPoller) pollStartSDR(ctx context.Context, task pollTask) {
s.pollers[pollerCommitMsg].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) { if task.TaskSDR == nil && s.pollers[pollerSDR].IsSet() {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_commit_msg = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_commit_msg is null`, id, task.SpID, task.SectorNumber) s.pollers[pollerSDR].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
if err != nil { n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_sdr = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_sdr is null`, id, task.SpID, task.SectorNumber)
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err) if err != nil {
} return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
if n != 1 { }
return false, xerrors.Errorf("expected to update 1 row, updated %d", n) if n != 1 {
} return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
return true, nil
})
}
if task.AfterCommitMsg && !task.AfterCommitMsgSuccess && s.pollers[pollerCommitMsg].IsSet() {
var execResult []struct {
ExecutedTskCID string `db:"executed_tsk_cid"`
ExecutedTskEpoch int64 `db:"executed_tsk_epoch"`
ExecutedMsgCID string `db:"executed_msg_cid"`
ExecutedRcptExitCode int64 `db:"executed_rcpt_exitcode"`
ExecutedRcptGasUsed int64 `db:"executed_rcpt_gas_used"`
} }
err := s.db.Select(ctx, &execResult, `SELECT executed_tsk_cid, executed_tsk_epoch, executed_msg_cid, executed_rcpt_exitcode, executed_rcpt_gas_used return true, nil
})
}
}
func (s *SealPoller) pollStartSDRTrees(ctx context.Context, task pollTask) {
if task.TaskTreeD == nil && task.TaskTreeC == nil && task.TaskTreeR == nil && s.pollers[pollerTrees].IsSet() && task.AfterSDR {
s.pollers[pollerTrees].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_tree_d = $1, task_id_tree_c = $1, task_id_tree_r = $1
WHERE sp_id = $2 AND sector_number = $3 and after_sdr = true and task_id_tree_d is null and task_id_tree_c is null and task_id_tree_r is null`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
}
func (s *SealPoller) pollStartPrecommitMsg(ctx context.Context, task pollTask) {
if task.TaskPrecommitMsg == nil && task.AfterTreeR && task.AfterTreeD {
s.pollers[pollerPrecommitMsg].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_precommit_msg = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_precommit_msg is null and after_tree_r = true and after_tree_d = true`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
}
func (s *SealPoller) pollPrecommitMsgLanded(ctx context.Context, task pollTask) error {
if task.TaskPrecommitMsg != nil && !task.AfterPrecommitMsgSuccess {
var execResult []struct {
ExecutedTskCID string `db:"executed_tsk_cid"`
ExecutedTskEpoch int64 `db:"executed_tsk_epoch"`
ExecutedMsgCID string `db:"executed_msg_cid"`
ExecutedRcptExitCode int64 `db:"executed_rcpt_exitcode"`
ExecutedRcptGasUsed int64 `db:"executed_rcpt_gas_used"`
}
err := s.db.Select(ctx, &execResult, `SELECT executed_tsk_cid, executed_tsk_epoch, executed_msg_cid, executed_rcpt_exitcode, executed_rcpt_gas_used
FROM sectors_sdr_pipeline
JOIN message_waits ON sectors_sdr_pipeline.precommit_msg_cid = message_waits.signed_message_cid
WHERE sp_id = $1 AND sector_number = $2 AND executed_tsk_epoch is not null`, task.SpID, task.SectorNumber)
if err != nil {
log.Errorw("failed to query message_waits", "error", err)
}
if len(execResult) > 0 {
maddr, err := address.NewIDAddress(uint64(task.SpID))
if err != nil {
return err
}
pci, err := s.api.StateSectorPreCommitInfo(ctx, maddr, abi.SectorNumber(task.SectorNumber), types.EmptyTSK)
if err != nil {
return xerrors.Errorf("get precommit info: %w", err)
}
if pci != nil {
randHeight := pci.PreCommitEpoch + policy.GetPreCommitChallengeDelay()
_, err := s.db.Exec(ctx, `UPDATE sectors_sdr_pipeline SET
seed_epoch = $1, precommit_msg_tsk = $2, after_precommit_msg_success = true
WHERE sp_id = $3 AND sector_number = $4 and seed_epoch is NULL`,
randHeight, execResult[0].ExecutedTskCID, task.SpID, task.SectorNumber)
if err != nil {
return xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
} // todo handle missing precommit info (eg expired precommit)
}
}
return nil
}
func (s *SealPoller) pollStartPoRep(ctx context.Context, task pollTask, ts *types.TipSet) {
if s.pollers[pollerPoRep].IsSet() && task.AfterPrecommitMsgSuccess && task.SeedEpoch != nil && task.TaskPoRep == nil && ts.Height() >= abi.ChainEpoch(*task.SeedEpoch+seedEpochConfidence) {
s.pollers[pollerPoRep].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_porep = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_porep is null`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
}
func (s *SealPoller) pollStartCommitMsg(ctx context.Context, task pollTask) {
if task.AfterPoRep && len(task.PoRepProof) > 0 && task.TaskCommitMsg == nil && s.pollers[pollerCommitMsg].IsSet() {
s.pollers[pollerCommitMsg].Val(ctx)(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) {
n, err := tx.Exec(`UPDATE sectors_sdr_pipeline SET task_id_commit_msg = $1 WHERE sp_id = $2 AND sector_number = $3 and task_id_commit_msg is null`, id, task.SpID, task.SectorNumber)
if err != nil {
return false, xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
if n != 1 {
return false, xerrors.Errorf("expected to update 1 row, updated %d", n)
}
return true, nil
})
}
}
func (s *SealPoller) pollCommitMsgLanded(ctx context.Context, task pollTask) error {
if task.AfterCommitMsg && !task.AfterCommitMsgSuccess && s.pollers[pollerCommitMsg].IsSet() {
var execResult []struct {
ExecutedTskCID string `db:"executed_tsk_cid"`
ExecutedTskEpoch int64 `db:"executed_tsk_epoch"`
ExecutedMsgCID string `db:"executed_msg_cid"`
ExecutedRcptExitCode int64 `db:"executed_rcpt_exitcode"`
ExecutedRcptGasUsed int64 `db:"executed_rcpt_gas_used"`
}
err := s.db.Select(ctx, &execResult, `SELECT executed_tsk_cid, executed_tsk_epoch, executed_msg_cid, executed_rcpt_exitcode, executed_rcpt_gas_used
FROM sectors_sdr_pipeline FROM sectors_sdr_pipeline
JOIN message_waits ON sectors_sdr_pipeline.commit_msg_cid = message_waits.signed_message_cid JOIN message_waits ON sectors_sdr_pipeline.commit_msg_cid = message_waits.signed_message_cid
WHERE sp_id = $1 AND sector_number = $2 AND executed_tsk_epoch is not null`, task.SpID, task.SectorNumber) WHERE sp_id = $1 AND sector_number = $2 AND executed_tsk_epoch is not null`, task.SpID, task.SectorNumber)
if err != nil {
log.Errorw("failed to query message_waits", "error", err)
}
if len(execResult) > 0 {
maddr, err := address.NewIDAddress(uint64(task.SpID))
if err != nil { if err != nil {
log.Errorw("failed to query message_waits", "error", err) return err
} }
if len(execResult) > 0 { si, err := s.api.StateSectorGetInfo(ctx, maddr, abi.SectorNumber(task.SectorNumber), types.EmptyTSK)
maddr, err := address.NewIDAddress(uint64(task.SpID)) if err != nil {
if err != nil { return xerrors.Errorf("get sector info: %w", err)
return err }
}
si, err := s.api.StateSectorGetInfo(ctx, maddr, abi.SectorNumber(task.SectorNumber), types.EmptyTSK) if si == nil {
if err != nil { log.Errorw("todo handle missing sector info (not found after cron)", "sp", task.SpID, "sector", task.SectorNumber, "exec_epoch", execResult[0].ExecutedTskEpoch, "exec_tskcid", execResult[0].ExecutedTskCID, "msg_cid", execResult[0].ExecutedMsgCID)
return xerrors.Errorf("get sector info: %w", err) // todo handdle missing sector info (not found after cron)
} } else {
// yay!
if si == nil { _, err := s.db.Exec(ctx, `UPDATE sectors_sdr_pipeline SET
log.Errorw("todo handle missing sector info (not found after cron)", "sp", task.SpID, "sector", task.SectorNumber, "exec_epoch", execResult[0].ExecutedTskEpoch, "exec_tskcid", execResult[0].ExecutedTskCID, "msg_cid", execResult[0].ExecutedMsgCID)
// todo handdle missing sector info (not found after cron)
} else {
// yay!
_, err := s.db.Exec(ctx, `UPDATE sectors_sdr_pipeline SET
after_commit_msg_success = true, commit_msg_tsk = $1 after_commit_msg_success = true, commit_msg_tsk = $1
WHERE sp_id = $2 AND sector_number = $3 and after_commit_msg_success = false`, WHERE sp_id = $2 AND sector_number = $3 and after_commit_msg_success = false`,
execResult[0].ExecutedTskCID, task.SpID, task.SectorNumber) execResult[0].ExecutedTskCID, task.SpID, task.SectorNumber)
if err != nil { if err != nil {
return xerrors.Errorf("update sectors_sdr_pipeline: %w", err) return xerrors.Errorf("update sectors_sdr_pipeline: %w", err)
}
} }
} }
} }
@ -296,3 +325,9 @@ func (s *SealPoller) poll(ctx context.Context) error {
return nil return nil
} }
func (s *SealPoller) mustPoll(err error) {
if err != nil {
log.Errorw("poller operation failed", "error", err)
}
}