lotus/miner/miner.go
Steven Allen 748d2e82a7 unshare the journal
Motivation:

* Run lotus with the race detector enabled (primary motivation).
* Allow multiple lotus nodes in a process (not a high priority).

Previously, the journal was shared between all lotus instances, but it was
initialized for every new node. This caused safety problems in tests (at a
minimum).

This patch explicitly passes the journal to all services that need it.
2020-10-09 13:23:07 -07:00

543 lines
14 KiB
Go

package miner
import (
"bytes"
"context"
"crypto/rand"
"encoding/binary"
"fmt"
"sync"
"time"
proof0 "github.com/filecoin-project/specs-actors/actors/runtime/proof"
"github.com/filecoin-project/lotus/chain/gen/slashfilter"
"github.com/filecoin-project/go-address"
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/crypto"
lru "github.com/hashicorp/golang-lru"
"github.com/filecoin-project/lotus/api"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/gen"
"github.com/filecoin-project/lotus/chain/messagepool/gasguess"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/journal"
logging "github.com/ipfs/go-log/v2"
"go.opencensus.io/trace"
"golang.org/x/xerrors"
)
var log = logging.Logger("miner")
// Journal event types.
const (
evtTypeBlockMined = iota
)
// returns a callback reporting whether we mined a blocks in this round
type waitFunc func(ctx context.Context, baseTime uint64) (func(bool, abi.ChainEpoch, error), abi.ChainEpoch, error)
func randTimeOffset(width time.Duration) time.Duration {
buf := make([]byte, 8)
rand.Reader.Read(buf) //nolint:errcheck
val := time.Duration(binary.BigEndian.Uint64(buf) % uint64(width))
return val - (width / 2)
}
func NewMiner(api api.FullNode, epp gen.WinningPoStProver, addr address.Address, sf *slashfilter.SlashFilter, j journal.Journal) *Miner {
arc, err := lru.NewARC(10000)
if err != nil {
panic(err)
}
return &Miner{
api: api,
epp: epp,
address: addr,
waitFunc: func(ctx context.Context, baseTime uint64) (func(bool, abi.ChainEpoch, error), abi.ChainEpoch, error) {
// Wait around for half the block time in case other parents come in
deadline := baseTime + build.PropagationDelaySecs
baseT := time.Unix(int64(deadline), 0)
baseT = baseT.Add(randTimeOffset(time.Second))
build.Clock.Sleep(build.Clock.Until(baseT))
return func(bool, abi.ChainEpoch, error) {}, 0, nil
},
sf: sf,
minedBlockHeights: arc,
evtTypes: [...]journal.EventType{
evtTypeBlockMined: j.RegisterEventType("miner", "block_mined"),
},
journal: j,
}
}
type Miner struct {
api api.FullNode
epp gen.WinningPoStProver
lk sync.Mutex
address address.Address
stop chan struct{}
stopping chan struct{}
waitFunc waitFunc
lastWork *MiningBase
sf *slashfilter.SlashFilter
minedBlockHeights *lru.ARCCache
evtTypes [1]journal.EventType
journal journal.Journal
}
func (m *Miner) Address() address.Address {
m.lk.Lock()
defer m.lk.Unlock()
return m.address
}
func (m *Miner) Start(ctx context.Context) error {
m.lk.Lock()
defer m.lk.Unlock()
if m.stop != nil {
return fmt.Errorf("miner already started")
}
m.stop = make(chan struct{})
go m.mine(context.TODO())
return nil
}
func (m *Miner) Stop(ctx context.Context) error {
m.lk.Lock()
m.stopping = make(chan struct{})
stopping := m.stopping
close(m.stop)
m.lk.Unlock()
select {
case <-stopping:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func (m *Miner) niceSleep(d time.Duration) bool {
select {
case <-build.Clock.After(d):
return true
case <-m.stop:
log.Infow("received interrupt while trying to sleep in mining cycle")
return false
}
}
func (m *Miner) mine(ctx context.Context) {
ctx, span := trace.StartSpan(ctx, "/mine")
defer span.End()
var lastBase MiningBase
minerLoop:
for {
select {
case <-m.stop:
stopping := m.stopping
m.stop = nil
m.stopping = nil
close(stopping)
return
default:
}
var base *MiningBase
var onDone func(bool, abi.ChainEpoch, error)
var injectNulls abi.ChainEpoch
for {
prebase, err := m.GetBestMiningCandidate(ctx)
if err != nil {
log.Errorf("failed to get best mining candidate: %s", err)
if !m.niceSleep(time.Second * 5) {
continue minerLoop
}
continue
}
if base != nil && base.TipSet.Height() == prebase.TipSet.Height() && base.NullRounds == prebase.NullRounds {
base = prebase
break
}
if base != nil {
onDone(false, 0, nil)
}
// TODO: need to change the orchestration here. the problem is that
// we are waiting *after* we enter this loop and selecta mining
// candidate, which is almost certain to change in multiminer
// tests. Instead, we should block before entering the loop, so
// that when the test 'MineOne' function is triggered, we pull our
// best mining candidate at that time.
// Wait until propagation delay period after block we plan to mine on
onDone, injectNulls, err = m.waitFunc(ctx, prebase.TipSet.MinTimestamp())
if err != nil {
log.Error(err)
continue
}
// just wait for the beacon entry to become available before we select our final mining base
_, err = m.api.BeaconGetEntry(ctx, prebase.TipSet.Height()+prebase.NullRounds+1)
if err != nil {
log.Errorf("failed getting beacon entry: %s", err)
if !m.niceSleep(time.Second) {
continue minerLoop
}
continue
}
base = prebase
}
base.NullRounds += injectNulls // testing
if base.TipSet.Equals(lastBase.TipSet) && lastBase.NullRounds == base.NullRounds {
log.Warnf("BestMiningCandidate from the previous round: %s (nulls:%d)", lastBase.TipSet.Cids(), lastBase.NullRounds)
if !m.niceSleep(time.Duration(build.BlockDelaySecs) * time.Second) {
continue minerLoop
}
continue
}
b, err := m.mineOne(ctx, base)
if err != nil {
log.Errorf("mining block failed: %+v", err)
if !m.niceSleep(time.Second) {
continue minerLoop
}
onDone(false, 0, err)
continue
}
lastBase = *base
var h abi.ChainEpoch
if b != nil {
h = b.Header.Height
}
onDone(b != nil, h, nil)
if b != nil {
m.journal.RecordEvent(m.evtTypes[evtTypeBlockMined], func() interface{} {
return map[string]interface{}{
"parents": base.TipSet.Cids(),
"nulls": base.NullRounds,
"epoch": b.Header.Height,
"timestamp": b.Header.Timestamp,
"cid": b.Header.Cid(),
}
})
btime := time.Unix(int64(b.Header.Timestamp), 0)
now := build.Clock.Now()
switch {
case btime == now:
// block timestamp is perfectly aligned with time.
case btime.After(now):
if !m.niceSleep(build.Clock.Until(btime)) {
log.Warnf("received interrupt while waiting to broadcast block, will shutdown after block is sent out")
build.Clock.Sleep(build.Clock.Until(btime))
}
default:
log.Warnw("mined block in the past",
"block-time", btime, "time", build.Clock.Now(), "difference", build.Clock.Since(btime))
}
if err := m.sf.MinedBlock(b.Header, base.TipSet.Height()+base.NullRounds); err != nil {
log.Errorf("<!!> SLASH FILTER ERROR: %s", err)
continue
}
blkKey := fmt.Sprintf("%d", b.Header.Height)
if _, ok := m.minedBlockHeights.Get(blkKey); ok {
log.Warnw("Created a block at the same height as another block we've created", "height", b.Header.Height, "miner", b.Header.Miner, "parents", b.Header.Parents)
continue
}
m.minedBlockHeights.Add(blkKey, true)
if err := m.api.SyncSubmitBlock(ctx, b); err != nil {
log.Errorf("failed to submit newly mined block: %s", err)
}
} else {
base.NullRounds++
// Wait until the next epoch, plus the propagation delay, so a new tipset
// has enough time to form.
//
// See: https://github.com/filecoin-project/lotus/issues/1845
nextRound := time.Unix(int64(base.TipSet.MinTimestamp()+build.BlockDelaySecs*uint64(base.NullRounds))+int64(build.PropagationDelaySecs), 0)
select {
case <-build.Clock.After(build.Clock.Until(nextRound)):
case <-m.stop:
stopping := m.stopping
m.stop = nil
m.stopping = nil
close(stopping)
return
}
}
}
}
type MiningBase struct {
TipSet *types.TipSet
NullRounds abi.ChainEpoch
}
func (m *Miner) GetBestMiningCandidate(ctx context.Context) (*MiningBase, error) {
m.lk.Lock()
defer m.lk.Unlock()
bts, err := m.api.ChainHead(ctx)
if err != nil {
return nil, err
}
if m.lastWork != nil {
if m.lastWork.TipSet.Equals(bts) {
return m.lastWork, nil
}
btsw, err := m.api.ChainTipSetWeight(ctx, bts.Key())
if err != nil {
return nil, err
}
ltsw, err := m.api.ChainTipSetWeight(ctx, m.lastWork.TipSet.Key())
if err != nil {
m.lastWork = nil
return nil, err
}
if types.BigCmp(btsw, ltsw) <= 0 {
return m.lastWork, nil
}
}
m.lastWork = &MiningBase{TipSet: bts}
return m.lastWork, nil
}
// mineOne attempts to mine a single block, and does so synchronously, if and
// only if we are eligible to mine.
//
// {hint/landmark}: This method coordinates all the steps involved in mining a
// block, including the condition of whether mine or not at all depending on
// whether we win the round or not.
//
// This method does the following:
//
// 1.
func (m *Miner) mineOne(ctx context.Context, base *MiningBase) (*types.BlockMsg, error) {
log.Debugw("attempting to mine a block", "tipset", types.LogCids(base.TipSet.Cids()))
start := build.Clock.Now()
round := base.TipSet.Height() + base.NullRounds + 1
mbi, err := m.api.MinerGetBaseInfo(ctx, m.address, round, base.TipSet.Key())
if err != nil {
return nil, xerrors.Errorf("failed to get mining base info: %w", err)
}
if mbi == nil {
return nil, nil
}
if !mbi.EligibleForMining {
// slashed or just have no power yet
return nil, nil
}
tMBI := build.Clock.Now()
beaconPrev := mbi.PrevBeaconEntry
tDrand := build.Clock.Now()
bvals := mbi.BeaconEntries
tPowercheck := build.Clock.Now()
log.Infof("Time delta between now and our mining base: %ds (nulls: %d)", uint64(build.Clock.Now().Unix())-base.TipSet.MinTimestamp(), base.NullRounds)
rbase := beaconPrev
if len(bvals) > 0 {
rbase = bvals[len(bvals)-1]
}
ticket, err := m.computeTicket(ctx, &rbase, base)
if err != nil {
return nil, xerrors.Errorf("scratching ticket failed: %w", err)
}
winner, err := gen.IsRoundWinner(ctx, base.TipSet, round, m.address, rbase, mbi, m.api)
if err != nil {
return nil, xerrors.Errorf("failed to check if we win next round: %w", err)
}
if winner == nil {
return nil, nil
}
tTicket := build.Clock.Now()
buf := new(bytes.Buffer)
if err := m.address.MarshalCBOR(buf); err != nil {
return nil, xerrors.Errorf("failed to marshal miner address: %w", err)
}
rand, err := store.DrawRandomness(rbase.Data, crypto.DomainSeparationTag_WinningPoStChallengeSeed, base.TipSet.Height()+base.NullRounds+1, buf.Bytes())
if err != nil {
return nil, xerrors.Errorf("failed to get randomness for winning post: %w", err)
}
prand := abi.PoStRandomness(rand)
tSeed := build.Clock.Now()
postProof, err := m.epp.ComputeProof(ctx, mbi.Sectors, prand)
if err != nil {
return nil, xerrors.Errorf("failed to compute winning post proof: %w", err)
}
// get pending messages early,
msgs, err := m.api.MpoolSelect(context.TODO(), base.TipSet.Key(), ticket.Quality())
if err != nil {
return nil, xerrors.Errorf("failed to select messages for block: %w", err)
}
tPending := build.Clock.Now()
// TODO: winning post proof
b, err := m.createBlock(base, m.address, ticket, winner, bvals, postProof, msgs)
if err != nil {
return nil, xerrors.Errorf("failed to create block: %w", err)
}
tCreateBlock := build.Clock.Now()
dur := tCreateBlock.Sub(start)
parentMiners := make([]address.Address, len(base.TipSet.Blocks()))
for i, header := range base.TipSet.Blocks() {
parentMiners[i] = header.Miner
}
log.Infow("mined new block", "cid", b.Cid(), "height", b.Header.Height, "miner", b.Header.Miner, "parents", parentMiners, "took", dur)
if dur > time.Second*time.Duration(build.BlockDelaySecs) {
log.Warnw("CAUTION: block production took longer than the block delay. Your computer may not be fast enough to keep up",
"tMinerBaseInfo ", tMBI.Sub(start),
"tDrand ", tDrand.Sub(tMBI),
"tPowercheck ", tPowercheck.Sub(tDrand),
"tTicket ", tTicket.Sub(tPowercheck),
"tSeed ", tSeed.Sub(tTicket),
"tPending ", tPending.Sub(tSeed),
"tCreateBlock ", tCreateBlock.Sub(tPending))
}
return b, nil
}
func (m *Miner) computeTicket(ctx context.Context, brand *types.BeaconEntry, base *MiningBase) (*types.Ticket, error) {
mi, err := m.api.StateMinerInfo(ctx, m.address, types.EmptyTSK)
if err != nil {
return nil, err
}
worker, err := m.api.StateAccountKey(ctx, mi.Worker, types.EmptyTSK)
if err != nil {
return nil, err
}
buf := new(bytes.Buffer)
if err := m.address.MarshalCBOR(buf); err != nil {
return nil, xerrors.Errorf("failed to marshal address to cbor: %w", err)
}
round := base.TipSet.Height() + base.NullRounds + 1
if round > build.UpgradeSmokeHeight {
buf.Write(base.TipSet.MinTicket().VRFProof)
}
input, err := store.DrawRandomness(brand.Data, crypto.DomainSeparationTag_TicketProduction, round-build.TicketRandomnessLookback, buf.Bytes())
if err != nil {
return nil, err
}
vrfOut, err := gen.ComputeVRF(ctx, m.api.WalletSign, worker, input)
if err != nil {
return nil, err
}
return &types.Ticket{
VRFProof: vrfOut,
}, nil
}
func (m *Miner) createBlock(base *MiningBase, addr address.Address, ticket *types.Ticket,
eproof *types.ElectionProof, bvals []types.BeaconEntry, wpostProof []proof0.PoStProof, msgs []*types.SignedMessage) (*types.BlockMsg, error) {
uts := base.TipSet.MinTimestamp() + build.BlockDelaySecs*(uint64(base.NullRounds)+1)
nheight := base.TipSet.Height() + base.NullRounds + 1
// why even return this? that api call could just submit it for us
return m.api.MinerCreateBlock(context.TODO(), &api.BlockTemplate{
Miner: addr,
Parents: base.TipSet.Key(),
Ticket: ticket,
Eproof: eproof,
BeaconValues: bvals,
Messages: msgs,
Epoch: nheight,
Timestamp: uts,
WinningPoStProof: wpostProof,
})
}
type actCacheEntry struct {
act *types.Actor
err error
}
type cachedActorLookup struct {
tsk types.TipSetKey
cache map[address.Address]actCacheEntry
fallback gasguess.ActorLookup
}
func (c *cachedActorLookup) StateGetActor(ctx context.Context, a address.Address, tsk types.TipSetKey) (*types.Actor, error) {
if c.tsk == tsk {
e, has := c.cache[a]
if has {
return e.act, e.err
}
}
e, err := c.fallback(ctx, a, tsk)
if c.tsk == tsk {
c.cache[a] = actCacheEntry{
act: e, err: err,
}
}
return e, err
}
type ActorLookup func(context.Context, address.Address, types.TipSetKey) (*types.Actor, error)