package sub import ( "bytes" "context" "encoding/binary" "sync" "time" address "github.com/filecoin-project/go-address" "github.com/filecoin-project/go-legs/dtsync" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain" "github.com/filecoin-project/lotus/chain/consensus" "github.com/filecoin-project/lotus/chain/messagepool" "github.com/filecoin-project/lotus/chain/store" "github.com/filecoin-project/lotus/chain/sub/ratelimit" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/metrics" "github.com/filecoin-project/lotus/node/impl/client" "github.com/filecoin-project/lotus/node/impl/full" lru "github.com/hashicorp/golang-lru" blocks "github.com/ipfs/go-block-format" bserv "github.com/ipfs/go-blockservice" "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log/v2" connmgr "github.com/libp2p/go-libp2p-core/connmgr" "github.com/libp2p/go-libp2p-core/peer" pubsub "github.com/libp2p/go-libp2p-pubsub" "go.opencensus.io/stats" "go.opencensus.io/tag" "golang.org/x/xerrors" ) var log = logging.Logger("sub") var msgCidPrefix = cid.Prefix{ Version: 1, Codec: cid.DagCBOR, MhType: client.DefaultHashFunction, MhLength: 32, } func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *chain.Syncer, bs bserv.BlockService, cmgr connmgr.ConnManager) { // Timeout after (block time + propagation delay). This is useless at // this point. timeout := time.Duration(build.BlockDelaySecs+build.PropagationDelaySecs) * time.Second for { msg, err := bsub.Next(ctx) if err != nil { if ctx.Err() != nil { log.Warn("quitting HandleIncomingBlocks loop") return } log.Error("error from block subscription: ", err) continue } blk, ok := msg.ValidatorData.(*types.BlockMsg) if !ok { log.Warnf("pubsub block validator passed on wrong type: %#v", msg.ValidatorData) return } src := msg.GetFrom() go func() { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() // NOTE: we could also share a single session between // all requests but that may have other consequences. ses := bserv.NewSession(ctx, bs) start := build.Clock.Now() log.Debug("about to fetch messages for block from pubsub") bmsgs, err := FetchMessagesByCids(ctx, ses, blk.BlsMessages) if err != nil { log.Errorf("failed to fetch all bls messages for block received over pubsub: %s; source: %s", err, src) return } smsgs, err := FetchSignedMessagesByCids(ctx, ses, blk.SecpkMessages) if err != nil { log.Errorf("failed to fetch all secpk messages for block received over pubsub: %s; source: %s", err, src) return } took := build.Clock.Since(start) log.Debugw("new block over pubsub", "cid", blk.Header.Cid(), "source", msg.GetFrom(), "msgfetch", took) if took > 3*time.Second { log.Warnw("Slow msg fetch", "cid", blk.Header.Cid(), "source", msg.GetFrom(), "msgfetch", took) } if delay := build.Clock.Now().Unix() - int64(blk.Header.Timestamp); delay > 5 { _ = stats.RecordWithTags(ctx, []tag.Mutator{tag.Insert(metrics.MinerID, blk.Header.Miner.String())}, metrics.BlockDelay.M(delay), ) log.Warnw("received block with large delay from miner", "block", blk.Cid(), "delay", delay, "miner", blk.Header.Miner) } if s.InformNewBlock(msg.ReceivedFrom, &types.FullBlock{ Header: blk.Header, BlsMessages: bmsgs, SecpkMessages: smsgs, }) { cmgr.TagPeer(msg.ReceivedFrom, "blkprop", 5) } }() } } func FetchMessagesByCids( ctx context.Context, bserv bserv.BlockGetter, cids []cid.Cid, ) ([]*types.Message, error) { out := make([]*types.Message, len(cids)) err := fetchCids(ctx, bserv, cids, func(i int, b blocks.Block) error { msg, err := types.DecodeMessage(b.RawData()) if err != nil { return err } out[i] = msg return nil }) if err != nil { return nil, err } return out, nil } // FIXME: Duplicate of above. func FetchSignedMessagesByCids( ctx context.Context, bserv bserv.BlockGetter, cids []cid.Cid, ) ([]*types.SignedMessage, error) { out := make([]*types.SignedMessage, len(cids)) err := fetchCids(ctx, bserv, cids, func(i int, b blocks.Block) error { smsg, err := types.DecodeSignedMessage(b.RawData()) if err != nil { return err } out[i] = smsg return nil }) if err != nil { return nil, err } return out, nil } // Fetch `cids` from the block service, apply `cb` on each of them. Used // by the fetch message functions above. // We check that each block is received only once and we do not received // blocks we did not request. func fetchCids( ctx context.Context, bserv bserv.BlockGetter, cids []cid.Cid, cb func(int, blocks.Block) error, ) error { ctx, cancel := context.WithCancel(ctx) defer cancel() cidIndex := make(map[cid.Cid]int) for i, c := range cids { if c.Prefix() != msgCidPrefix { return xerrors.Errorf("invalid msg CID: %s", c) } cidIndex[c] = i } if len(cids) != len(cidIndex) { return xerrors.Errorf("duplicate CIDs in fetchCids input") } for block := range bserv.GetBlocks(ctx, cids) { ix, ok := cidIndex[block.Cid()] if !ok { // Ignore duplicate/unexpected blocks. This shouldn't // happen, but we can be safe. log.Errorw("received duplicate/unexpected block when syncing", "cid", block.Cid()) continue } // Record that we've received the block. delete(cidIndex, block.Cid()) if err := cb(ix, block); err != nil { return err } } if len(cidIndex) > 0 { err := ctx.Err() if err == nil { err = xerrors.Errorf("failed to fetch %d messages for unknown reasons", len(cidIndex)) } return err } return nil } type BlockValidator struct { self peer.ID peers *lru.TwoQueueCache killThresh int recvBlocks *blockReceiptCache blacklist func(peer.ID) // necessary for block validation chain *store.ChainStore consensus consensus.Consensus } func NewBlockValidator(self peer.ID, chain *store.ChainStore, cns consensus.Consensus, blacklist func(peer.ID)) *BlockValidator { p, _ := lru.New2Q(4096) return &BlockValidator{ self: self, peers: p, killThresh: 10, blacklist: blacklist, recvBlocks: newBlockReceiptCache(), chain: chain, consensus: cns, } } func (bv *BlockValidator) flagPeer(p peer.ID) { v, ok := bv.peers.Get(p) if !ok { bv.peers.Add(p, int(1)) return } val := v.(int) if val >= bv.killThresh { log.Warnf("blacklisting peer %s", p) bv.blacklist(p) return } bv.peers.Add(p, v.(int)+1) } func (bv *BlockValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) (res pubsub.ValidationResult) { defer func() { if rerr := recover(); rerr != nil { err := xerrors.Errorf("validate block: %s", rerr) recordFailure(ctx, metrics.BlockValidationFailure, err.Error()) bv.flagPeer(pid) res = pubsub.ValidationReject return } }() var what string res, what = bv.consensus.ValidateBlockPubsub(ctx, pid == bv.self, msg) if res == pubsub.ValidationAccept { // it's a good block! make sure we've only seen it once if count := bv.recvBlocks.add(msg.ValidatorData.(*types.BlockMsg).Cid()); count > 0 { if pid == bv.self { log.Warnf("local block has been seen %d times; ignoring", count) } // TODO: once these changes propagate to the network, we can consider // dropping peers who send us the same block multiple times return pubsub.ValidationIgnore } } else { recordFailure(ctx, metrics.BlockValidationFailure, what) } return res } type blockReceiptCache struct { blocks *lru.TwoQueueCache } func newBlockReceiptCache() *blockReceiptCache { c, _ := lru.New2Q(8192) return &blockReceiptCache{ blocks: c, } } func (brc *blockReceiptCache) add(bcid cid.Cid) int { val, ok := brc.blocks.Get(bcid) if !ok { brc.blocks.Add(bcid, int(1)) return 0 } brc.blocks.Add(bcid, val.(int)+1) return val.(int) } type MessageValidator struct { self peer.ID mpool *messagepool.MessagePool } func NewMessageValidator(self peer.ID, mp *messagepool.MessagePool) *MessageValidator { return &MessageValidator{self: self, mpool: mp} } func (mv *MessageValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) pubsub.ValidationResult { if pid == mv.self { return mv.validateLocalMessage(ctx, msg) } start := time.Now() defer func() { ms := time.Now().Sub(start).Microseconds() stats.Record(ctx, metrics.MessageValidationDuration.M(float64(ms)/1000)) }() stats.Record(ctx, metrics.MessageReceived.M(1)) m, err := types.DecodeSignedMessage(msg.Message.GetData()) if err != nil { log.Warnf("failed to decode incoming message: %s", err) ctx, _ = tag.New(ctx, tag.Insert(metrics.FailureType, "decode")) stats.Record(ctx, metrics.MessageValidationFailure.M(1)) return pubsub.ValidationReject } if err := mv.mpool.Add(ctx, m); err != nil { log.Debugf("failed to add message from network to message pool (From: %s, To: %s, Nonce: %d, Value: %s): %s", m.Message.From, m.Message.To, m.Message.Nonce, types.FIL(m.Message.Value), err) ctx, _ = tag.New( ctx, tag.Upsert(metrics.Local, "false"), ) recordFailure(ctx, metrics.MessageValidationFailure, "add") switch { case xerrors.Is(err, messagepool.ErrSoftValidationFailure): fallthrough case xerrors.Is(err, messagepool.ErrRBFTooLowPremium): fallthrough case xerrors.Is(err, messagepool.ErrTooManyPendingMessages): fallthrough case xerrors.Is(err, messagepool.ErrNonceGap): fallthrough case xerrors.Is(err, messagepool.ErrNonceTooLow): return pubsub.ValidationIgnore default: return pubsub.ValidationReject } } ctx, _ = tag.New( ctx, tag.Upsert(metrics.MsgValid, "true"), ) stats.Record(ctx, metrics.MessageValidationSuccess.M(1)) return pubsub.ValidationAccept } func (mv *MessageValidator) validateLocalMessage(ctx context.Context, msg *pubsub.Message) pubsub.ValidationResult { ctx, _ = tag.New( ctx, tag.Upsert(metrics.Local, "true"), ) start := time.Now() defer func() { ms := time.Now().Sub(start).Microseconds() stats.Record(ctx, metrics.MessageValidationDuration.M(float64(ms)/1000)) }() // do some lightweight validation stats.Record(ctx, metrics.MessagePublished.M(1)) m, err := types.DecodeSignedMessage(msg.Message.GetData()) if err != nil { log.Warnf("failed to decode local message: %s", err) recordFailure(ctx, metrics.MessageValidationFailure, "decode") return pubsub.ValidationIgnore } if m.Size() > messagepool.MaxMessageSize { log.Warnf("local message is too large! (%dB)", m.Size()) recordFailure(ctx, metrics.MessageValidationFailure, "oversize") return pubsub.ValidationIgnore } if m.Message.To == address.Undef { log.Warn("local message has invalid destination address") recordFailure(ctx, metrics.MessageValidationFailure, "undef-addr") return pubsub.ValidationIgnore } if !m.Message.Value.LessThan(types.TotalFilecoinInt) { log.Warnf("local messages has too high value: %s", m.Message.Value) recordFailure(ctx, metrics.MessageValidationFailure, "value-too-high") return pubsub.ValidationIgnore } if err := mv.mpool.VerifyMsgSig(m); err != nil { log.Warnf("signature verification failed for local message: %s", err) recordFailure(ctx, metrics.MessageValidationFailure, "verify-sig") return pubsub.ValidationIgnore } ctx, _ = tag.New( ctx, tag.Upsert(metrics.MsgValid, "true"), ) stats.Record(ctx, metrics.MessageValidationSuccess.M(1)) return pubsub.ValidationAccept } func HandleIncomingMessages(ctx context.Context, mpool *messagepool.MessagePool, msub *pubsub.Subscription) { for { _, err := msub.Next(ctx) if err != nil { log.Warn("error from message subscription: ", err) if ctx.Err() != nil { log.Warn("quitting HandleIncomingMessages loop") return } continue } // Do nothing... everything happens in validate } } func recordFailure(ctx context.Context, metric *stats.Int64Measure, failureType string) { ctx, _ = tag.New( ctx, tag.Upsert(metrics.FailureType, failureType), ) stats.Record(ctx, metric.M(1)) } type peerMsgInfo struct { peerID peer.ID lastCid cid.Cid lastSeqno uint64 rateLimit *ratelimit.Window mutex sync.Mutex } type IndexerMessageValidator struct { self peer.ID peerCache *lru.TwoQueueCache chainApi full.ChainModuleAPI stateApi full.StateModuleAPI } func NewIndexerMessageValidator(self peer.ID, chainApi full.ChainModuleAPI, stateApi full.StateModuleAPI) *IndexerMessageValidator { peerCache, _ := lru.New2Q(8192) return &IndexerMessageValidator{ self: self, peerCache: peerCache, chainApi: chainApi, stateApi: stateApi, } } func (v *IndexerMessageValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) pubsub.ValidationResult { // This chain-node should not be publishing its own messages. These are // relayed from market-nodes. If a node appears to be local, reject it. if pid == v.self { log.Warnf("refusing to relay indexer message from self") stats.Record(ctx, metrics.IndexerMessageValidationFailure.M(1)) return pubsub.ValidationReject } originPeer := msg.GetFrom() if originPeer == v.self { log.Warnf("refusing to relay indexer message originating from self") stats.Record(ctx, metrics.IndexerMessageValidationFailure.M(1)) return pubsub.ValidationReject } idxrMsg := dtsync.Message{} err := idxrMsg.UnmarshalCBOR(bytes.NewBuffer(msg.Data)) if err != nil { log.Errorw("Could not decode indexer pubsub message", "err", err) return pubsub.ValidationReject } if len(idxrMsg.ExtraData) == 0 { log.Debugw("ignoring messsage missing miner id", "peer", originPeer) return pubsub.ValidationIgnore } minerID := string(idxrMsg.ExtraData) msgCid := idxrMsg.Cid var msgInfo *peerMsgInfo val, ok := v.peerCache.Get(minerID) if !ok { msgInfo = &peerMsgInfo{} } else { msgInfo = val.(*peerMsgInfo) } // Lock this peer's message info. msgInfo.mutex.Lock() defer msgInfo.mutex.Unlock() if ok { // Reject replayed messages. seqno := binary.BigEndian.Uint64(msg.Message.GetSeqno()) if seqno <= msgInfo.lastSeqno { log.Debugf("ignoring replayed indexer message") return pubsub.ValidationIgnore } msgInfo.lastSeqno = seqno } if !ok || originPeer != msgInfo.peerID { // Check that the miner ID maps to the peer that sent the message. err = v.authenticateMessage(ctx, minerID, originPeer) if err != nil { log.Warnw("cannot authenticate messsage", "err", err, "peer", originPeer, "minerID", minerID) stats.Record(ctx, metrics.IndexerMessageValidationFailure.M(1)) return pubsub.ValidationReject } msgInfo.peerID = originPeer if !ok { // Add msgInfo to cache only after being authenticated. If two // messages from the same peer are handled concurrently, there is a // small chance that one msgInfo could replace the other here when // the info is first cached. This is OK, so no need to prevent it. v.peerCache.Add(minerID, msgInfo) } } // See if message needs to be ignored due to rate limiting. if v.rateLimitPeer(msgInfo, msgCid) { return pubsub.ValidationIgnore } stats.Record(ctx, metrics.IndexerMessageValidationSuccess.M(1)) return pubsub.ValidationAccept } func (v *IndexerMessageValidator) rateLimitPeer(msgInfo *peerMsgInfo, msgCid cid.Cid) bool { const ( msgLimit = 5 msgTimeLimit = 10 * time.Second repeatTimeLimit = 2 * time.Hour ) timeWindow := msgInfo.rateLimit // Check overall message rate. if timeWindow == nil { timeWindow = ratelimit.NewWindow(msgLimit, msgTimeLimit) msgInfo.rateLimit = timeWindow } else if msgInfo.lastCid == msgCid { // Check if this is a repeat of the previous message data. if time.Since(timeWindow.Newest()) < repeatTimeLimit { log.Warnw("ignoring repeated indexer message", "sender", msgInfo.peerID) return true } } err := timeWindow.Add() if err != nil { log.Warnw("ignoring indexer message", "sender", msgInfo.peerID, "err", err) return true } msgInfo.lastCid = msgCid return false } func (v *IndexerMessageValidator) authenticateMessage(ctx context.Context, minerID string, peerID peer.ID) error { // Get miner info from lotus minerAddress, err := address.NewFromString(minerID) if err != nil { return xerrors.Errorf("invalid miner id: %w", err) } ts, err := v.chainApi.ChainHead(ctx) if err != nil { return err } minerInfo, err := v.stateApi.StateMinerInfo(ctx, minerAddress, ts.Key()) if err != nil { return err } if minerInfo.PeerId == nil { return xerrors.New("no peer id for miner") } if *minerInfo.PeerId != peerID { return xerrors.New("miner id does not map to peer that sent message") } return nil }