lotus/chain/sub/incoming.go

package sub

import (
	"bytes"
	"context"
	"encoding/binary"
	"sync"
	"time"

	lru "github.com/hashicorp/golang-lru"
	blocks "github.com/ipfs/go-block-format"
	bserv "github.com/ipfs/go-blockservice"
	"github.com/ipfs/go-cid"
	logging "github.com/ipfs/go-log/v2"
	pubsub "github.com/libp2p/go-libp2p-pubsub"
	"github.com/libp2p/go-libp2p/core/connmgr"
	"github.com/libp2p/go-libp2p/core/peer"
	"go.opencensus.io/stats"
	"go.opencensus.io/tag"
	"golang.org/x/xerrors"

	"github.com/filecoin-project/go-address"
	"github.com/filecoin-project/go-legs/dtsync"

	"github.com/filecoin-project/lotus/build"
	"github.com/filecoin-project/lotus/chain"
	"github.com/filecoin-project/lotus/chain/consensus"
	"github.com/filecoin-project/lotus/chain/messagepool"
	"github.com/filecoin-project/lotus/chain/store"
	"github.com/filecoin-project/lotus/chain/sub/ratelimit"
	"github.com/filecoin-project/lotus/chain/types"
	"github.com/filecoin-project/lotus/metrics"
	"github.com/filecoin-project/lotus/node/impl/client"
	"github.com/filecoin-project/lotus/node/impl/full"
)

var log = logging.Logger("sub")

var msgCidPrefix = cid.Prefix{
	Version:  1,
	Codec:    cid.DagCBOR,
	MhType:   client.DefaultHashFunction,
	MhLength: 32,
}

func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *chain.Syncer, bs bserv.BlockService, cmgr connmgr.ConnManager) {
	// Timeout after (block time + propagation delay). This is useless at
	// this point.
	timeout := time.Duration(build.BlockDelaySecs+build.PropagationDelaySecs) * time.Second

	for {
		msg, err := bsub.Next(ctx)
		if err != nil {
			if ctx.Err() != nil {
				log.Warn("quitting HandleIncomingBlocks loop")
				return
			}
			log.Error("error from block subscription: ", err)
			continue
		}

		blk, ok := msg.ValidatorData.(*types.BlockMsg)
		if !ok {
			log.Warnf("pubsub block validator passed on wrong type: %#v", msg.ValidatorData)
			return
		}

		src := msg.GetFrom()

		go func() {
			ctx, cancel := context.WithTimeout(ctx, timeout)
			defer cancel()

			// NOTE: we could also share a single session between
			// all requests but that may have other consequences.
			ses := bserv.NewSession(ctx, bs)

			start := build.Clock.Now()
			log.Debug("about to fetch messages for block from pubsub")
			bmsgs, err := FetchMessagesByCids(ctx, ses, blk.BlsMessages)
			if err != nil {
				log.Errorf("failed to fetch all bls messages for block received over pubsub: %s; source: %s", err, src)
				return
			}

			smsgs, err := FetchSignedMessagesByCids(ctx, ses, blk.SecpkMessages)
			if err != nil {
				log.Errorf("failed to fetch all secpk messages for block received over pubsub: %s; source: %s", err, src)
				return
			}

			took := build.Clock.Since(start)
			log.Debugw("new block over pubsub", "cid", blk.Header.Cid(), "source", msg.GetFrom(), "msgfetch", took)
			if took > 3*time.Second {
				log.Warnw("Slow msg fetch", "cid", blk.Header.Cid(), "source", msg.GetFrom(), "msgfetch", took)
			}
			if delay := build.Clock.Now().Unix() - int64(blk.Header.Timestamp); delay > 5 {
				_ = stats.RecordWithTags(ctx,
					[]tag.Mutator{tag.Insert(metrics.MinerID, blk.Header.Miner.String())},
					metrics.BlockDelay.M(delay),
				)
				log.Warnw("received block with large delay from miner", "block", blk.Cid(), "delay", delay, "miner", blk.Header.Miner)
			}

			if s.InformNewBlock(msg.ReceivedFrom, &types.FullBlock{
				Header:        blk.Header,
				BlsMessages:   bmsgs,
				SecpkMessages: smsgs,
			}) {
				cmgr.TagPeer(msg.ReceivedFrom, "blkprop", 5)
			}
		}()
	}
}

func FetchMessagesByCids(
	ctx context.Context,
	bserv bserv.BlockGetter,
	cids []cid.Cid,
) ([]*types.Message, error) {
	out := make([]*types.Message, len(cids))

	err := fetchCids(ctx, bserv, cids, func(i int, b blocks.Block) error {
		msg, err := types.DecodeMessage(b.RawData())
		if err != nil {
			return err
		}

		out[i] = msg
		return nil
	})
	if err != nil {
		return nil, err
	}
	return out, nil
}

// FIXME: Duplicate of above.
func FetchSignedMessagesByCids(
	ctx context.Context,
	bserv bserv.BlockGetter,
	cids []cid.Cid,
) ([]*types.SignedMessage, error) {
	out := make([]*types.SignedMessage, len(cids))

	err := fetchCids(ctx, bserv, cids, func(i int, b blocks.Block) error {
		smsg, err := types.DecodeSignedMessage(b.RawData())
		if err != nil {
			return err
		}

		out[i] = smsg
		return nil
	})
	if err != nil {
		return nil, err
	}
	return out, nil
}

// Fetch `cids` from the block service, apply `cb` on each of them. Used
//  by the fetch message functions above.
// We check that each block is received only once and we do not received
//  blocks we did not request.
func fetchCids(
	ctx context.Context,
	bserv bserv.BlockGetter,
	cids []cid.Cid,
	cb func(int, blocks.Block) error,
) error {

	ctx, cancel := context.WithCancel(ctx)
	defer cancel()

	cidIndex := make(map[cid.Cid]int)
	for i, c := range cids {
		if c.Prefix() != msgCidPrefix {
			return xerrors.Errorf("invalid msg CID: %s", c)
		}
		cidIndex[c] = i
	}
	if len(cids) != len(cidIndex) {
		return xerrors.Errorf("duplicate CIDs in fetchCids input")
	}

	for block := range bserv.GetBlocks(ctx, cids) {
		ix, ok := cidIndex[block.Cid()]
		if !ok {
			// Ignore duplicate/unexpected blocks. This shouldn't
			// happen, but we can be safe.
			log.Errorw("received duplicate/unexpected block when syncing", "cid", block.Cid())
			continue
		}

		// Record that we've received the block.
		delete(cidIndex, block.Cid())

		if err := cb(ix, block); err != nil {
			return err
		}
	}

	if len(cidIndex) > 0 {
		err := ctx.Err()
		if err == nil {
			err = xerrors.Errorf("failed to fetch %d messages for unknown reasons", len(cidIndex))
		}
		return err
	}

	return nil
}

type BlockValidator struct {
	self peer.ID

	peers *lru.TwoQueueCache

	killThresh int

	recvBlocks *blockReceiptCache

	blacklist func(peer.ID)

	// necessary for block validation
	chain     *store.ChainStore
	consensus consensus.Consensus
}

func NewBlockValidator(self peer.ID, chain *store.ChainStore, cns consensus.Consensus, blacklist func(peer.ID)) *BlockValidator {
	p, _ := lru.New2Q(4096)
	return &BlockValidator{
		self:       self,
		peers:      p,
		killThresh: 10,
		blacklist:  blacklist,
		recvBlocks: newBlockReceiptCache(),
		chain:      chain,
		consensus:  cns,
	}
}

func (bv *BlockValidator) flagPeer(p peer.ID) {
	v, ok := bv.peers.Get(p)
	if !ok {
		bv.peers.Add(p, int(1))
		return
	}

	val := v.(int)

	if val >= bv.killThresh {
		log.Warnf("blacklisting peer %s", p)
		bv.blacklist(p)
		return
	}

	bv.peers.Add(p, v.(int)+1)
}

func (bv *BlockValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) (res pubsub.ValidationResult) {
	defer func() {
		if rerr := recover(); rerr != nil {
			err := xerrors.Errorf("validate block: %s", rerr)
			recordFailure(ctx, metrics.BlockValidationFailure, err.Error())
			bv.flagPeer(pid)
			res = pubsub.ValidationReject
			return
		}
	}()

	var what string
	res, what = bv.consensus.ValidateBlockPubsub(ctx, pid == bv.self, msg)
	if res == pubsub.ValidationAccept {
		// it's a good block! make sure we've only seen it once
		if count := bv.recvBlocks.add(msg.ValidatorData.(*types.BlockMsg).Cid()); count > 0 {
			if pid == bv.self {
				log.Warnf("local block has been seen %d times; ignoring", count)
			}

			// TODO: once these changes propagate to the network, we can consider
			// dropping peers who send us the same block multiple times
			return pubsub.ValidationIgnore
		}
	} else {
		recordFailure(ctx, metrics.BlockValidationFailure, what)
	}

	return res
}

type blockReceiptCache struct {
	blocks *lru.TwoQueueCache
}

func newBlockReceiptCache() *blockReceiptCache {
	c, _ := lru.New2Q(8192)

	return &blockReceiptCache{
		blocks: c,
	}
}

func (brc *blockReceiptCache) add(bcid cid.Cid) int {
	val, ok := brc.blocks.Get(bcid)
	if !ok {
		brc.blocks.Add(bcid, int(1))
		return 0
	}

	brc.blocks.Add(bcid, val.(int)+1)
	return val.(int)
}

type MessageValidator struct {
	self  peer.ID
	mpool *messagepool.MessagePool
}

func NewMessageValidator(self peer.ID, mp *messagepool.MessagePool) *MessageValidator {
	return &MessageValidator{self: self, mpool: mp}
}

func (mv *MessageValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) pubsub.ValidationResult {
	if pid == mv.self {
		return mv.validateLocalMessage(ctx, msg)
	}

	start := time.Now()
	defer func() {
		ms := time.Now().Sub(start).Microseconds()
		stats.Record(ctx, metrics.MessageValidationDuration.M(float64(ms)/1000))
	}()

	stats.Record(ctx, metrics.MessageReceived.M(1))
	m, err := types.DecodeSignedMessage(msg.Message.GetData())
	if err != nil {
		log.Warnf("failed to decode incoming message: %s", err)
		ctx, _ = tag.New(ctx, tag.Insert(metrics.FailureType, "decode"))
		stats.Record(ctx, metrics.MessageValidationFailure.M(1))
		return pubsub.ValidationReject
	}

	if err := mv.mpool.Add(ctx, m); err != nil {
		log.Debugf("failed to add message from network to message pool (From: %s, To: %s, Nonce: %d, Value: %s): %s", m.Message.From, m.Message.To, m.Message.Nonce, types.FIL(m.Message.Value), err)
		ctx, _ = tag.New(
			ctx,
			tag.Upsert(metrics.Local, "false"),
		)
		recordFailure(ctx, metrics.MessageValidationFailure, "add")
		switch {
		case xerrors.Is(err, messagepool.ErrSoftValidationFailure):
			fallthrough
		case xerrors.Is(err, messagepool.ErrRBFTooLowPremium):
			fallthrough
		case xerrors.Is(err, messagepool.ErrTooManyPendingMessages):
			fallthrough
		case xerrors.Is(err, messagepool.ErrNonceGap):
			fallthrough
		case xerrors.Is(err, messagepool.ErrNonceTooLow):
			return pubsub.ValidationIgnore
		default:
			return pubsub.ValidationReject
		}
	}

	ctx, _ = tag.New(
		ctx,
		tag.Upsert(metrics.MsgValid, "true"),
	)

	stats.Record(ctx, metrics.MessageValidationSuccess.M(1))
	return pubsub.ValidationAccept
}

func (mv *MessageValidator) validateLocalMessage(ctx context.Context, msg *pubsub.Message) pubsub.ValidationResult {
	ctx, _ = tag.New(
		ctx,
		tag.Upsert(metrics.Local, "true"),
	)

	start := time.Now()
	defer func() {
		ms := time.Now().Sub(start).Microseconds()
		stats.Record(ctx, metrics.MessageValidationDuration.M(float64(ms)/1000))
	}()

	// do some lightweight validation
	stats.Record(ctx, metrics.MessagePublished.M(1))

	m, err := types.DecodeSignedMessage(msg.Message.GetData())
	if err != nil {
		log.Warnf("failed to decode local message: %s", err)
		recordFailure(ctx, metrics.MessageValidationFailure, "decode")
		return pubsub.ValidationIgnore
	}

	if m.Size() > messagepool.MaxMessageSize {
		log.Warnf("local message is too large! (%dB)", m.Size())
		recordFailure(ctx, metrics.MessageValidationFailure, "oversize")
		return pubsub.ValidationIgnore
	}

	if m.Message.To == address.Undef {
		log.Warn("local message has invalid destination address")
		recordFailure(ctx, metrics.MessageValidationFailure, "undef-addr")
		return pubsub.ValidationIgnore
	}

	if !m.Message.Value.LessThan(types.TotalFilecoinInt) {
		log.Warnf("local messages has too high value: %s", m.Message.Value)
		recordFailure(ctx, metrics.MessageValidationFailure, "value-too-high")
		return pubsub.ValidationIgnore
	}

	if err := mv.mpool.VerifyMsgSig(m); err != nil {
		log.Warnf("signature verification failed for local message: %s", err)
		recordFailure(ctx, metrics.MessageValidationFailure, "verify-sig")
		return pubsub.ValidationIgnore
	}

	ctx, _ = tag.New(
		ctx,
		tag.Upsert(metrics.MsgValid, "true"),
	)

	stats.Record(ctx, metrics.MessageValidationSuccess.M(1))
	return pubsub.ValidationAccept
}

func HandleIncomingMessages(ctx context.Context, mpool *messagepool.MessagePool, msub *pubsub.Subscription) {
	for {
		_, err := msub.Next(ctx)
		if err != nil {
			log.Warn("error from message subscription: ", err)
			if ctx.Err() != nil {
				log.Warn("quitting HandleIncomingMessages loop")
				return
			}
			continue
		}

		// Do nothing... everything happens in validate
	}
}

func recordFailure(ctx context.Context, metric *stats.Int64Measure, failureType string) {
	ctx, _ = tag.New(
		ctx,
		tag.Upsert(metrics.FailureType, failureType),
	)
	stats.Record(ctx, metric.M(1))
}

type peerMsgInfo struct {
	peerID    peer.ID
	lastCid   cid.Cid
	lastSeqno uint64
	rateLimit *ratelimit.Window
	mutex     sync.Mutex
}

type IndexerMessageValidator struct {
	self peer.ID

	peerCache *lru.TwoQueueCache
	chainApi  full.ChainModuleAPI
	stateApi  full.StateModuleAPI
}

func NewIndexerMessageValidator(self peer.ID, chainApi full.ChainModuleAPI, stateApi full.StateModuleAPI) *IndexerMessageValidator {
	peerCache, _ := lru.New2Q(8192)

	return &IndexerMessageValidator{
		self:      self,
		peerCache: peerCache,
		chainApi:  chainApi,
		stateApi:  stateApi,
	}
}

func (v *IndexerMessageValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) pubsub.ValidationResult {
	// This chain-node should not be publishing its own messages.  These are
	// relayed from market-nodes.  If a node appears to be local, reject it.
	if pid == v.self {
		log.Debug("ignoring indexer message from self")
		stats.Record(ctx, metrics.IndexerMessageValidationFailure.M(1))
		return pubsub.ValidationIgnore
	}
	originPeer := msg.GetFrom()
	if originPeer == v.self {
		log.Debug("ignoring indexer message originating from self")
		stats.Record(ctx, metrics.IndexerMessageValidationFailure.M(1))
		return pubsub.ValidationIgnore
	}

	idxrMsg := dtsync.Message{}
	err := idxrMsg.UnmarshalCBOR(bytes.NewBuffer(msg.Data))
	if err != nil {
		log.Errorw("Could not decode indexer pubsub message", "err", err)
		return pubsub.ValidationReject
	}
	if len(idxrMsg.ExtraData) == 0 {
		log.Debugw("ignoring messsage missing miner id", "peer", originPeer)
		return pubsub.ValidationIgnore
	}

	// Get miner info from lotus
	minerAddr, err := address.NewFromBytes(idxrMsg.ExtraData)
	if err != nil {
		log.Warnw("cannot parse extra data as miner address", "err", err, "extraData", idxrMsg.ExtraData)
		return pubsub.ValidationReject
	}

	minerID := minerAddr.String()
	msgCid := idxrMsg.Cid

	var msgInfo *peerMsgInfo
	val, ok := v.peerCache.Get(minerID)
	if !ok {
		msgInfo = &peerMsgInfo{}
	} else {
		msgInfo = val.(*peerMsgInfo)
	}

	// Lock this peer's message info.
	msgInfo.mutex.Lock()
	defer msgInfo.mutex.Unlock()

	if ok {
		// Reject replayed messages.
		seqno := binary.BigEndian.Uint64(msg.Message.GetSeqno())
		if seqno <= msgInfo.lastSeqno {
			log.Debugf("ignoring replayed indexer message")
			return pubsub.ValidationIgnore
		}
		msgInfo.lastSeqno = seqno
	}

	if !ok || originPeer != msgInfo.peerID {
		// Check that the miner ID maps to the peer that sent the message.
		err = v.authenticateMessage(ctx, minerAddr, originPeer)
		if err != nil {
			log.Warnw("cannot authenticate messsage", "err", err, "peer", originPeer, "minerID", minerID)
			stats.Record(ctx, metrics.IndexerMessageValidationFailure.M(1))
			return pubsub.ValidationReject
		}
		msgInfo.peerID = originPeer
		if !ok {
			// Add msgInfo to cache only after being authenticated.  If two
			// messages from the same peer are handled concurrently, there is a
			// small chance that one msgInfo could replace the other here when
			// the info is first cached.  This is OK, so no need to prevent it.
			v.peerCache.Add(minerID, msgInfo)
		}
	}

	// See if message needs to be ignored due to rate limiting.
	if v.rateLimitPeer(msgInfo, msgCid) {
		return pubsub.ValidationIgnore
	}

	stats.Record(ctx, metrics.IndexerMessageValidationSuccess.M(1))
	return pubsub.ValidationAccept
}

func (v *IndexerMessageValidator) rateLimitPeer(msgInfo *peerMsgInfo, msgCid cid.Cid) bool {
	const (
		msgLimit        = 5
		msgTimeLimit    = 10 * time.Second
		repeatTimeLimit = 2 * time.Hour
	)

	timeWindow := msgInfo.rateLimit

	// Check overall message rate.
	if timeWindow == nil {
		timeWindow = ratelimit.NewWindow(msgLimit, msgTimeLimit)
		msgInfo.rateLimit = timeWindow
	} else if msgInfo.lastCid == msgCid {
		// Check if this is a repeat of the previous message data.
		if time.Since(timeWindow.Newest()) < repeatTimeLimit {
			log.Warnw("ignoring repeated indexer message", "sender", msgInfo.peerID)
			return true
		}
	}

	err := timeWindow.Add()
	if err != nil {
		log.Warnw("ignoring indexer message", "sender", msgInfo.peerID, "err", err)
		return true
	}

	msgInfo.lastCid = msgCid

	return false
}

func (v *IndexerMessageValidator) authenticateMessage(ctx context.Context, minerAddress address.Address, peerID peer.ID) error {
	ts, err := v.chainApi.ChainHead(ctx)
	if err != nil {
		return err
	}

	minerInfo, err := v.stateApi.StateMinerInfo(ctx, minerAddress, ts.Key())
	if err != nil {
		return err
	}

	if minerInfo.PeerId == nil {
		return xerrors.New("no peer id for miner")
	}
	if *minerInfo.PeerId != peerID {
		return xerrors.New("miner id does not map to peer that sent message")
	}

	return nil
}