first pass, ready for review

This commit is contained in:
Lucas Molas 2020-07-27 12:31:36 -03:00
parent 43491cb7ed
commit 76d40ec408
14 changed files with 1179 additions and 1045 deletions

View File

@ -6,5 +6,5 @@ import "github.com/raulk/clock"
// we use a real-time clock, which maps to the `time` package.
//
// Tests that need control of time can replace this variable with
// clock.NewMock().
// clock.NewMock(). Always use real time for socket/stream deadlines.
var Clock = clock.New()

View File

@ -1,276 +0,0 @@
package blocksync
import (
"bufio"
"context"
"time"
"github.com/libp2p/go-libp2p-core/protocol"
"go.opencensus.io/trace"
"golang.org/x/xerrors"
cborutil "github.com/filecoin-project/go-cbor-util"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
"github.com/ipfs/go-cid"
logging "github.com/ipfs/go-log/v2"
inet "github.com/libp2p/go-libp2p-core/network"
"github.com/libp2p/go-libp2p-core/peer"
)
var log = logging.Logger("blocksync")
type NewStreamFunc func(context.Context, peer.ID, ...protocol.ID) (inet.Stream, error)
const BlockSyncProtocolID = "/fil/sync/blk/0.0.1"
const BlockSyncMaxRequestLength = 800
// BlockSyncService is the component that services BlockSync requests from
// peers.
//
// BlockSync is the basic chain synchronization protocol of Filecoin. BlockSync
// is an RPC-oriented protocol, with a single operation to request blocks.
//
// A request contains a start anchor block (referred to with a CID), and a
// amount of blocks requested beyond the anchor (including the anchor itself).
//
// A client can also pass options, encoded as a 64-bit bitfield. Lotus supports
// two options at the moment:
//
// - include block contents
// - include block messages
//
// The response will include a status code, an optional message, and the
// response payload in case of success. The payload is a slice of serialized
// tipsets.
type BlockSyncService struct {
cs *store.ChainStore
}
type BlockSyncRequest struct {
Start []cid.Cid
RequestLength uint64
Options uint64
}
type BSOptions struct {
IncludeBlocks bool
IncludeMessages bool
}
func ParseBSOptions(optfield uint64) *BSOptions {
return &BSOptions{
IncludeBlocks: optfield&(BSOptBlocks) != 0,
IncludeMessages: optfield&(BSOptMessages) != 0,
}
}
const (
BSOptBlocks = 1 << iota
BSOptMessages
)
const (
StatusOK = uint64(0)
StatusPartial = uint64(101)
StatusNotFound = uint64(201)
StatusGoAway = uint64(202)
StatusInternalError = uint64(203)
StatusBadRequest = uint64(204)
)
type BlockSyncResponse struct {
Chain []*BSTipSet
Status uint64
Message string
}
type BSTipSet struct {
Blocks []*types.BlockHeader
BlsMessages []*types.Message
BlsMsgIncludes [][]uint64
SecpkMessages []*types.SignedMessage
SecpkMsgIncludes [][]uint64
}
func NewBlockSyncService(cs *store.ChainStore) *BlockSyncService {
return &BlockSyncService{
cs: cs,
}
}
func (bss *BlockSyncService) HandleStream(s inet.Stream) {
ctx, span := trace.StartSpan(context.Background(), "blocksync.HandleStream")
defer span.End()
defer s.Close() //nolint:errcheck
var req BlockSyncRequest
if err := cborutil.ReadCborRPC(bufio.NewReader(s), &req); err != nil {
log.Warnf("failed to read block sync request: %s", err)
return
}
log.Infow("block sync request", "start", req.Start, "len", req.RequestLength)
resp, err := bss.processRequest(ctx, s.Conn().RemotePeer(), &req)
if err != nil {
log.Warn("failed to process block sync request: ", err)
return
}
writeDeadline := 60 * time.Second
_ = s.SetDeadline(time.Now().Add(writeDeadline)) // always use real time for socket/stream deadlines.
if err := cborutil.WriteCborRPC(s, resp); err != nil {
log.Warnw("failed to write back response for handle stream", "err", err, "peer", s.Conn().RemotePeer())
return
}
}
func (bss *BlockSyncService) processRequest(ctx context.Context, p peer.ID, req *BlockSyncRequest) (*BlockSyncResponse, error) {
_, span := trace.StartSpan(ctx, "blocksync.ProcessRequest")
defer span.End()
opts := ParseBSOptions(req.Options)
if len(req.Start) == 0 {
return &BlockSyncResponse{
Status: StatusBadRequest,
Message: "no cids given in blocksync request",
}, nil
}
span.AddAttributes(
trace.BoolAttribute("blocks", opts.IncludeBlocks),
trace.BoolAttribute("messages", opts.IncludeMessages),
trace.Int64Attribute("reqlen", int64(req.RequestLength)),
)
reqlen := req.RequestLength
if reqlen > BlockSyncMaxRequestLength {
log.Warnw("limiting blocksync request length", "orig", req.RequestLength, "peer", p)
reqlen = BlockSyncMaxRequestLength
}
chain, err := collectChainSegment(bss.cs, types.NewTipSetKey(req.Start...), reqlen, opts)
if err != nil {
log.Warn("encountered error while responding to block sync request: ", err)
return &BlockSyncResponse{
Status: StatusInternalError,
Message: err.Error(),
}, nil
}
status := StatusOK
if reqlen < req.RequestLength {
status = StatusPartial
}
return &BlockSyncResponse{
Chain: chain,
Status: status,
}, nil
}
func collectChainSegment(cs *store.ChainStore, start types.TipSetKey, length uint64, opts *BSOptions) ([]*BSTipSet, error) {
var bstips []*BSTipSet
cur := start
for {
var bst BSTipSet
ts, err := cs.LoadTipSet(cur)
if err != nil {
return nil, xerrors.Errorf("failed loading tipset %s: %w", cur, err)
}
if opts.IncludeMessages {
bmsgs, bmincl, smsgs, smincl, err := gatherMessages(cs, ts)
if err != nil {
return nil, xerrors.Errorf("gather messages failed: %w", err)
}
bst.BlsMessages = bmsgs
bst.BlsMsgIncludes = bmincl
bst.SecpkMessages = smsgs
bst.SecpkMsgIncludes = smincl
}
if opts.IncludeBlocks {
bst.Blocks = ts.Blocks()
}
bstips = append(bstips, &bst)
if uint64(len(bstips)) >= length || ts.Height() == 0 {
return bstips, nil
}
cur = ts.Parents()
}
}
func gatherMessages(cs *store.ChainStore, ts *types.TipSet) ([]*types.Message, [][]uint64, []*types.SignedMessage, [][]uint64, error) {
blsmsgmap := make(map[cid.Cid]uint64)
secpkmsgmap := make(map[cid.Cid]uint64)
var secpkmsgs []*types.SignedMessage
var blsmsgs []*types.Message
var secpkincl, blsincl [][]uint64
for _, b := range ts.Blocks() {
bmsgs, smsgs, err := cs.MessagesForBlock(b)
if err != nil {
return nil, nil, nil, nil, err
}
bmi := make([]uint64, 0, len(bmsgs))
for _, m := range bmsgs {
i, ok := blsmsgmap[m.Cid()]
if !ok {
i = uint64(len(blsmsgs))
blsmsgs = append(blsmsgs, m)
blsmsgmap[m.Cid()] = i
}
bmi = append(bmi, i)
}
blsincl = append(blsincl, bmi)
smi := make([]uint64, 0, len(smsgs))
for _, m := range smsgs {
i, ok := secpkmsgmap[m.Cid()]
if !ok {
i = uint64(len(secpkmsgs))
secpkmsgs = append(secpkmsgs, m)
secpkmsgmap[m.Cid()] = i
}
smi = append(smi, i)
}
secpkincl = append(secpkincl, smi)
}
return blsmsgs, blsincl, secpkmsgs, secpkincl, nil
}
func bstsToFullTipSet(bts *BSTipSet) (*store.FullTipSet, error) {
fts := &store.FullTipSet{}
for i, b := range bts.Blocks {
fb := &types.FullBlock{
Header: b,
}
for _, mi := range bts.BlsMsgIncludes[i] {
fb.BlsMessages = append(fb.BlsMessages, bts.BlsMessages[mi])
}
for _, mi := range bts.SecpkMsgIncludes[i] {
fb.SecpkMessages = append(fb.SecpkMessages, bts.SecpkMessages[mi])
}
fts.Blocks = append(fts.Blocks, fb)
}
return fts, nil
}

View File

@ -1,603 +0,0 @@
package blocksync
import (
"bufio"
"context"
"fmt"
"math/rand"
"sort"
"sync"
"time"
blocks "github.com/ipfs/go-block-format"
bserv "github.com/ipfs/go-blockservice"
"github.com/ipfs/go-cid"
graphsync "github.com/ipfs/go-graphsync"
gsnet "github.com/ipfs/go-graphsync/network"
host "github.com/libp2p/go-libp2p-core/host"
inet "github.com/libp2p/go-libp2p-core/network"
"github.com/libp2p/go-libp2p-core/peer"
"go.opencensus.io/trace"
"golang.org/x/xerrors"
cborutil "github.com/filecoin-project/go-cbor-util"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
incrt "github.com/filecoin-project/lotus/lib/increadtimeout"
"github.com/filecoin-project/lotus/lib/peermgr"
"github.com/filecoin-project/lotus/node/modules/dtypes"
)
type BlockSync struct {
bserv bserv.BlockService
gsync graphsync.GraphExchange
host host.Host
syncPeers *bsPeerTracker
peerMgr *peermgr.PeerMgr
}
func NewBlockSyncClient(bserv dtypes.ChainBlockService, h host.Host, pmgr peermgr.MaybePeerMgr, gs dtypes.Graphsync) *BlockSync {
return &BlockSync{
bserv: bserv,
host: h,
syncPeers: newPeerTracker(pmgr.Mgr),
peerMgr: pmgr.Mgr,
gsync: gs,
}
}
func (bs *BlockSync) processStatus(req *BlockSyncRequest, res *BlockSyncResponse) error {
switch res.Status {
case StatusPartial: // Partial Response
return xerrors.Errorf("not handling partial blocksync responses yet")
case StatusNotFound: // req.Start not found
return xerrors.Errorf("not found")
case StatusGoAway: // Go Away
return xerrors.Errorf("not handling 'go away' blocksync responses yet")
case StatusInternalError: // Internal Error
return xerrors.Errorf("block sync peer errored: %s", res.Message)
case StatusBadRequest:
return xerrors.Errorf("block sync request invalid: %s", res.Message)
default:
return xerrors.Errorf("unrecognized response code: %d", res.Status)
}
}
// GetBlocks fetches count blocks from the network, from the provided tipset
// *backwards*, returning as many tipsets as count.
//
// {hint/usage}: This is used by the Syncer during normal chain syncing and when
// resolving forks.
func (bs *BlockSync) GetBlocks(ctx context.Context, tsk types.TipSetKey, count int) ([]*types.TipSet, error) {
ctx, span := trace.StartSpan(ctx, "bsync.GetBlocks")
defer span.End()
if span.IsRecordingEvents() {
span.AddAttributes(
trace.StringAttribute("tipset", fmt.Sprint(tsk.Cids())),
trace.Int64Attribute("count", int64(count)),
)
}
req := &BlockSyncRequest{
Start: tsk.Cids(),
RequestLength: uint64(count),
Options: BSOptBlocks,
}
// this peerset is sorted by latency and failure counting.
peers := bs.getPeers()
// randomize the first few peers so we don't always pick the same peer
shufflePrefix(peers)
start := build.Clock.Now()
var oerr error
for _, p := range peers {
// TODO: doing this synchronously isnt great, but fetching in parallel
// may not be a good idea either. think about this more
select {
case <-ctx.Done():
return nil, xerrors.Errorf("blocksync getblocks failed: %w", ctx.Err())
default:
}
res, err := bs.sendRequestToPeer(ctx, p, req)
if err != nil {
oerr = err
if !xerrors.Is(err, inet.ErrNoConn) {
log.Warnf("BlockSync request failed for peer %s: %s", p.String(), err)
}
continue
}
if res.Status == StatusOK || res.Status == StatusPartial {
resp, err := bs.processBlocksResponse(req, res)
if err != nil {
return nil, xerrors.Errorf("success response from peer failed to process: %w", err)
}
bs.syncPeers.logGlobalSuccess(build.Clock.Since(start))
bs.host.ConnManager().TagPeer(p, "bsync", 25)
return resp, nil
}
oerr = bs.processStatus(req, res)
if oerr != nil {
log.Warnf("BlockSync peer %s response was an error: %s", p.String(), oerr)
}
}
return nil, xerrors.Errorf("GetBlocks failed with all peers: %w", oerr)
}
func (bs *BlockSync) GetFullTipSet(ctx context.Context, p peer.ID, tsk types.TipSetKey) (*store.FullTipSet, error) {
// TODO: round robin through these peers on error
req := &BlockSyncRequest{
Start: tsk.Cids(),
RequestLength: 1,
Options: BSOptBlocks | BSOptMessages,
}
res, err := bs.sendRequestToPeer(ctx, p, req)
if err != nil {
return nil, err
}
switch res.Status {
case 0: // Success
if len(res.Chain) == 0 {
return nil, fmt.Errorf("got zero length chain response")
}
bts := res.Chain[0]
return bstsToFullTipSet(bts)
case 101: // Partial Response
return nil, xerrors.Errorf("partial responses are not handled for single tipset fetching")
case 201: // req.Start not found
return nil, fmt.Errorf("not found")
case 202: // Go Away
return nil, xerrors.Errorf("received 'go away' response peer")
case 203: // Internal Error
return nil, fmt.Errorf("block sync peer errored: %q", res.Message)
case 204: // Invalid Request
return nil, fmt.Errorf("block sync request invalid: %q", res.Message)
default:
return nil, fmt.Errorf("unrecognized response code")
}
}
func shufflePrefix(peers []peer.ID) {
pref := 5
if len(peers) < pref {
pref = len(peers)
}
buf := make([]peer.ID, pref)
perm := rand.Perm(pref)
for i, v := range perm {
buf[i] = peers[v]
}
copy(peers, buf)
}
func (bs *BlockSync) GetChainMessages(ctx context.Context, h *types.TipSet, count uint64) ([]*BSTipSet, error) {
ctx, span := trace.StartSpan(ctx, "GetChainMessages")
defer span.End()
peers := bs.getPeers()
// randomize the first few peers so we don't always pick the same peer
shufflePrefix(peers)
req := &BlockSyncRequest{
Start: h.Cids(),
RequestLength: count,
Options: BSOptMessages,
}
var err error
start := build.Clock.Now()
for _, p := range peers {
res, rerr := bs.sendRequestToPeer(ctx, p, req)
if rerr != nil {
err = rerr
log.Warnf("BlockSync request failed for peer %s: %s", p.String(), err)
continue
}
if res.Status == StatusOK {
bs.syncPeers.logGlobalSuccess(build.Clock.Since(start))
return res.Chain, nil
}
if res.Status == StatusPartial {
// TODO: track partial response sizes to ensure we don't overrequest too often
return res.Chain, nil
}
err = bs.processStatus(req, res)
if err != nil {
log.Warnf("BlockSync peer %s response was an error: %s", p.String(), err)
}
}
if err == nil {
return nil, xerrors.Errorf("GetChainMessages failed, no peers connected")
}
// TODO: What if we have no peers (and err is nil)?
return nil, xerrors.Errorf("GetChainMessages failed with all peers(%d): %w", len(peers), err)
}
func (bs *BlockSync) sendRequestToPeer(ctx context.Context, p peer.ID, req *BlockSyncRequest) (_ *BlockSyncResponse, err error) {
ctx, span := trace.StartSpan(ctx, "sendRequestToPeer")
defer span.End()
defer func() {
if err != nil {
if span.IsRecordingEvents() {
span.SetStatus(trace.Status{
Code: 5,
Message: err.Error(),
})
}
}
}()
if span.IsRecordingEvents() {
span.AddAttributes(
trace.StringAttribute("peer", p.Pretty()),
)
}
gsproto := string(gsnet.ProtocolGraphsync)
supp, err := bs.host.Peerstore().SupportsProtocols(p, BlockSyncProtocolID, gsproto)
if err != nil {
return nil, xerrors.Errorf("failed to get protocols for peer: %w", err)
}
if len(supp) == 0 {
return nil, xerrors.Errorf("peer %s supports no known sync protocols", p)
}
switch supp[0] {
case BlockSyncProtocolID:
res, err := bs.fetchBlocksBlockSync(ctx, p, req)
if err != nil {
return nil, xerrors.Errorf("blocksync req failed: %w", err)
}
return res, nil
case gsproto:
res, err := bs.fetchBlocksGraphSync(ctx, p, req)
if err != nil {
return nil, xerrors.Errorf("graphsync req failed: %w", err)
}
return res, nil
default:
return nil, xerrors.Errorf("peerstore somehow returned unexpected protocols: %v", supp)
}
}
func (bs *BlockSync) fetchBlocksBlockSync(ctx context.Context, p peer.ID, req *BlockSyncRequest) (*BlockSyncResponse, error) {
ctx, span := trace.StartSpan(ctx, "blockSyncFetch")
defer span.End()
start := build.Clock.Now()
s, err := bs.host.NewStream(inet.WithNoDial(ctx, "should already have connection"), p, BlockSyncProtocolID)
if err != nil {
bs.RemovePeer(p)
return nil, xerrors.Errorf("failed to open stream to peer: %w", err)
}
_ = s.SetWriteDeadline(time.Now().Add(5 * time.Second)) // always use real time for socket/stream deadlines.
if err := cborutil.WriteCborRPC(s, req); err != nil {
_ = s.SetWriteDeadline(time.Time{})
bs.syncPeers.logFailure(p, build.Clock.Since(start))
return nil, err
}
_ = s.SetWriteDeadline(time.Time{})
var res BlockSyncResponse
r := incrt.New(s, 50<<10, 5*time.Second)
if err := cborutil.ReadCborRPC(bufio.NewReader(r), &res); err != nil {
bs.syncPeers.logFailure(p, build.Clock.Since(start))
return nil, err
}
if span.IsRecordingEvents() {
span.AddAttributes(
trace.Int64Attribute("resp_status", int64(res.Status)),
trace.StringAttribute("msg", res.Message),
trace.Int64Attribute("chain_len", int64(len(res.Chain))),
)
}
bs.syncPeers.logSuccess(p, build.Clock.Since(start))
return &res, nil
}
func (bs *BlockSync) processBlocksResponse(req *BlockSyncRequest, res *BlockSyncResponse) ([]*types.TipSet, error) {
if len(res.Chain) == 0 {
return nil, xerrors.Errorf("got no blocks in successful blocksync response")
}
cur, err := types.NewTipSet(res.Chain[0].Blocks)
if err != nil {
return nil, err
}
out := []*types.TipSet{cur}
for bi := 1; bi < len(res.Chain); bi++ {
next := res.Chain[bi].Blocks
nts, err := types.NewTipSet(next)
if err != nil {
return nil, err
}
if !types.CidArrsEqual(cur.Parents().Cids(), nts.Cids()) {
return nil, fmt.Errorf("parents of tipset[%d] were not tipset[%d]", bi-1, bi)
}
out = append(out, nts)
cur = nts
}
return out, nil
}
func (bs *BlockSync) GetBlock(ctx context.Context, c cid.Cid) (*types.BlockHeader, error) {
sb, err := bs.bserv.GetBlock(ctx, c)
if err != nil {
return nil, err
}
return types.DecodeBlock(sb.RawData())
}
func (bs *BlockSync) AddPeer(p peer.ID) {
bs.syncPeers.addPeer(p)
}
func (bs *BlockSync) RemovePeer(p peer.ID) {
bs.syncPeers.removePeer(p)
}
// getPeers returns a preference-sorted set of peers to query.
func (bs *BlockSync) getPeers() []peer.ID {
return bs.syncPeers.prefSortedPeers()
}
func (bs *BlockSync) FetchMessagesByCids(ctx context.Context, cids []cid.Cid) ([]*types.Message, error) {
out := make([]*types.Message, len(cids))
err := bs.fetchCids(ctx, cids, func(i int, b blocks.Block) error {
msg, err := types.DecodeMessage(b.RawData())
if err != nil {
return err
}
if out[i] != nil {
return fmt.Errorf("received duplicate message")
}
out[i] = msg
return nil
})
if err != nil {
return nil, err
}
return out, nil
}
func (bs *BlockSync) FetchSignedMessagesByCids(ctx context.Context, cids []cid.Cid) ([]*types.SignedMessage, error) {
out := make([]*types.SignedMessage, len(cids))
err := bs.fetchCids(ctx, cids, func(i int, b blocks.Block) error {
smsg, err := types.DecodeSignedMessage(b.RawData())
if err != nil {
return err
}
if out[i] != nil {
return fmt.Errorf("received duplicate message")
}
out[i] = smsg
return nil
})
if err != nil {
return nil, err
}
return out, nil
}
func (bs *BlockSync) fetchCids(ctx context.Context, cids []cid.Cid, cb func(int, blocks.Block) error) error {
resp := bs.bserv.GetBlocks(context.TODO(), cids)
m := make(map[cid.Cid]int)
for i, c := range cids {
m[c] = i
}
for i := 0; i < len(cids); i++ {
select {
case v, ok := <-resp:
if !ok {
if i == len(cids)-1 {
break
}
return fmt.Errorf("failed to fetch all messages")
}
ix, ok := m[v.Cid()]
if !ok {
return fmt.Errorf("received message we didnt ask for")
}
if err := cb(ix, v); err != nil {
return err
}
}
}
return nil
}
type peerStats struct {
successes int
failures int
firstSeen time.Time
averageTime time.Duration
}
type bsPeerTracker struct {
lk sync.Mutex
peers map[peer.ID]*peerStats
avgGlobalTime time.Duration
pmgr *peermgr.PeerMgr
}
func newPeerTracker(pmgr *peermgr.PeerMgr) *bsPeerTracker {
return &bsPeerTracker{
peers: make(map[peer.ID]*peerStats),
pmgr: pmgr,
}
}
func (bpt *bsPeerTracker) addPeer(p peer.ID) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
if _, ok := bpt.peers[p]; ok {
return
}
bpt.peers[p] = &peerStats{
firstSeen: build.Clock.Now(),
}
}
const (
// newPeerMul is how much better than average is the new peer assumed to be
// less than one to encourouge trying new peers
newPeerMul = 0.9
)
func (bpt *bsPeerTracker) prefSortedPeers() []peer.ID {
// TODO: this could probably be cached, but as long as its not too many peers, fine for now
bpt.lk.Lock()
defer bpt.lk.Unlock()
out := make([]peer.ID, 0, len(bpt.peers))
for p := range bpt.peers {
out = append(out, p)
}
// sort by 'expected cost' of requesting data from that peer
// additionally handle edge cases where not enough data is available
sort.Slice(out, func(i, j int) bool {
pi := bpt.peers[out[i]]
pj := bpt.peers[out[j]]
var costI, costJ float64
getPeerInitLat := func(p peer.ID) float64 {
var res float64
if bpt.pmgr != nil {
if lat, ok := bpt.pmgr.GetPeerLatency(p); ok {
res = float64(lat)
}
}
if res == 0 {
res = float64(bpt.avgGlobalTime)
}
return res * newPeerMul
}
if pi.successes+pi.failures > 0 {
failRateI := float64(pi.failures) / float64(pi.failures+pi.successes)
costI = float64(pi.averageTime) + failRateI*float64(bpt.avgGlobalTime)
} else {
costI = getPeerInitLat(out[i])
}
if pj.successes+pj.failures > 0 {
failRateJ := float64(pj.failures) / float64(pj.failures+pj.successes)
costJ = float64(pj.averageTime) + failRateJ*float64(bpt.avgGlobalTime)
} else {
costJ = getPeerInitLat(out[j])
}
return costI < costJ
})
return out
}
const (
// xInvAlpha = (N+1)/2
localInvAlpha = 5 // 86% of the value is the last 9
globalInvAlpha = 20 // 86% of the value is the last 39
)
func (bpt *bsPeerTracker) logGlobalSuccess(dur time.Duration) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
if bpt.avgGlobalTime == 0 {
bpt.avgGlobalTime = dur
return
}
delta := (dur - bpt.avgGlobalTime) / globalInvAlpha
bpt.avgGlobalTime += delta
}
func logTime(pi *peerStats, dur time.Duration) {
if pi.averageTime == 0 {
pi.averageTime = dur
return
}
delta := (dur - pi.averageTime) / localInvAlpha
pi.averageTime += delta
}
func (bpt *bsPeerTracker) logSuccess(p peer.ID, dur time.Duration) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
var pi *peerStats
var ok bool
if pi, ok = bpt.peers[p]; !ok {
log.Warnw("log success called on peer not in tracker", "peerid", p.String())
return
}
pi.successes++
logTime(pi, dur)
}
func (bpt *bsPeerTracker) logFailure(p peer.ID, dur time.Duration) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
var pi *peerStats
var ok bool
if pi, ok = bpt.peers[p]; !ok {
log.Warn("log failure called on peer not in tracker", "peerid", p.String())
return
}
pi.failures++
logTime(pi, dur)
}
func (bpt *bsPeerTracker) removePeer(p peer.ID) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
delete(bpt.peers, p)
}

446
chain/blocksync/client.go Normal file
View File

@ -0,0 +1,446 @@
package blocksync
import (
"bufio"
"context"
"fmt"
"math/rand"
"time"
host "github.com/libp2p/go-libp2p-core/host"
inet "github.com/libp2p/go-libp2p-core/network"
"github.com/libp2p/go-libp2p-core/peer"
"go.opencensus.io/trace"
"golang.org/x/xerrors"
cborutil "github.com/filecoin-project/go-cbor-util"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
incrt "github.com/filecoin-project/lotus/lib/increadtimeout"
"github.com/filecoin-project/lotus/lib/peermgr"
)
// Protocol client.
// FIXME: Rename to just `Client`. Not done at the moment to avoid
// disrupt too much of the consumer code, should be done along
// https://github.com/filecoin-project/lotus/issues/2612.
type BlockSync struct {
// Connection manager used to contact the server.
// FIXME: We should have a reduced interface here, initialized
// just with our protocol ID, we shouldn't be able to open *any*
// connection.
host host.Host
peerTracker *bsPeerTracker
}
func NewClient(
host host.Host,
pmgr peermgr.MaybePeerMgr,
) *BlockSync {
return &BlockSync{
host: host,
peerTracker: newPeerTracker(pmgr.Mgr),
}
}
// Main logic of the client request service. The provided `Request`
// is sent to the `singlePeer` if one is indicated or to all available
// ones otherwise. The response is processed and validated according
// to the `Request` options. Either a `ValidatedResponse` is returned
// (which can be safely accessed), or an `error` that may represent
// either a response error status, a failed validation or an internal
// error.
//
// This is the internal single-point-of-entry for all external-facing
// APIs, currently we have 3 very heterogeneous services exposed:
// * GetBlocks: Headers
// * GetFullTipSet: Headers | Messages
// * GetChainMessages: Messages
// This function handles all the different combinations of the available
// request options without disrupting external calls. In the future the
// consumers should be forced to use a more standardized service and
// adhere to a single API derived from this function.
func (client *BlockSync) doRequest(
ctx context.Context,
req *Request,
singlePeer *peer.ID,
) (*ValidatedResponse, error) {
// Validate request.
if req.Length == 0 {
return nil, xerrors.Errorf("invalid request of length 0")
}
if req.Length > MaxRequestLength {
return nil, xerrors.Errorf("request length (%d) above maximum (%d)",
req.Length, MaxRequestLength)
}
if req.Options == 0 {
return nil, xerrors.Errorf("request with no options set")
}
// Generate the list of peers to be queried, either the
// `singlePeer` indicated or all peers available (sorted
// by an internal peer tracker with some randomness injected).
var peers []peer.ID
if singlePeer != nil {
peers = []peer.ID{*singlePeer}
} else {
peers = client.getShuffledPeers()
if len(peers) == 0 {
return nil, xerrors.Errorf("no peers available")
}
}
// Try the request for each peer in the list,
// return on the first successful response.
// FIXME: Doing this serially isn't great, but fetching in parallel
// may not be a good idea either. Think about this more.
startTime := build.Clock.Now()
// FIXME: Should we track time per peer instead of a global one?
for _, peer := range peers {
select {
case <-ctx.Done():
return nil, xerrors.Errorf("context cancelled: %w", ctx.Err())
default:
}
// Send request, read response.
res, err := client.sendRequestToPeer(ctx, peer, req)
if err != nil {
if !xerrors.Is(err, inet.ErrNoConn) {
log.Warnf("could not connect to peer %s: %s",
peer.String(), err)
}
continue
}
// Process and validate response.
validRes, err := client.processResponse(req, res)
if err != nil {
log.Warnf("processing peer %s response failed: %s",
peer.String(), err)
continue
}
client.peerTracker.logGlobalSuccess(build.Clock.Since(startTime))
client.host.ConnManager().TagPeer(peer, "bsync", SUCCESS_PEER_TAG_VALUE)
return validRes, nil
}
errString := "doRequest failed for all peers"
if singlePeer != nil {
errString = "doRequest failed for single peer"
// (The peer has already been logged before, don't print it again.)
}
return nil, xerrors.Errorf(errString)
}
// Process and validate response. Check the status and that the information
// returned matches the request (and its integrity). Extract the information
// into a `ValidatedResponse` for the external-facing APIs to select what they
// want.
//
// We are conflating in the single error returned both status and validation
// errors. Peer penalization should happen here then, before returning, so
// we can apply the correct penalties depending on the cause of the error.
func (client *BlockSync) processResponse(
req *Request,
res *Response,
// FIXME: Add the `peer` as argument once we implement penalties.
) (*ValidatedResponse, error) {
err := res.statusToError()
if err != nil {
return nil, xerrors.Errorf("status error: %s", err)
}
options := parseOptions(req.Options)
if options.noOptionsSet() {
// Safety check, this shouldn't happen, and even if it did
// it should be caught by the peer in its error status.
return nil, xerrors.Errorf("nothing was requested")
}
// Verify that the chain segment returned is in the valid range.
// Note that the returned length might be less than requested.
resLength := len(res.Chain)
if resLength == 0 {
return nil, xerrors.Errorf("got no chain in successful response")
}
if resLength > int(req.Length) {
return nil, xerrors.Errorf("got longer response (%d) than requested (%d)",
resLength, req.Length)
}
if resLength < int(req.Length) && res.Status != Partial {
return nil, xerrors.Errorf("got less than requested without a proper status: %s", res.Status)
}
validRes := &ValidatedResponse{}
if options.IncludeHeaders {
// Check for valid block sets and extract them into `TipSet`s.
validRes.Tipsets = make([]*types.TipSet, resLength)
for i := 0; i < resLength; i++ {
validRes.Tipsets[i], err = types.NewTipSet(res.Chain[i].Blocks)
if err != nil {
return nil, xerrors.Errorf("invalid tipset blocks at height (head - %d): %w", i, err)
}
}
// Check that the returned head matches the one requested.
if !types.CidArrsEqual(validRes.Tipsets[0].Cids(), req.Head) {
return nil, xerrors.Errorf("returned chain head does not match request")
}
// Check `TipSet` are connected (valid chain).
for i := 0; i < len(validRes.Tipsets) - 1; i++ {
if validRes.Tipsets[i].IsChildOf(validRes.Tipsets[i+1]) == false {
return nil, fmt.Errorf("tipsets are not connected at height (head - %d)/(head - %d)",
i, i+1)
// FIXME: Maybe give more information here, like CIDs.
}
}
}
if options.IncludeMessages {
validRes.Messages = make([]*CompactedMessages, resLength)
for i := 0; i < resLength; i++ {
if res.Chain[i].Messages == nil {
return nil, xerrors.Errorf("no messages included for tipset at height (head - %d): %w", i)
}
validRes.Messages[i] = res.Chain[i].Messages
}
if options.IncludeHeaders {
// If the headers were also returned check that the compression
// indexes are valid before `toFullTipSets()` is called by the
// consumer.
for tipsetIdx := 0; tipsetIdx < resLength; tipsetIdx++ {
msgs := res.Chain[tipsetIdx].Messages
blocksNum := len(res.Chain[tipsetIdx].Blocks)
if len(msgs.BlsIncludes) != blocksNum {
return nil, xerrors.Errorf("BlsIncludes (%d) does not match number of blocks (%d)",
len(msgs.BlsIncludes), blocksNum)
}
if len(msgs.SecpkIncludes) != blocksNum {
return nil, xerrors.Errorf("SecpkIncludes (%d) does not match number of blocks (%d)",
len(msgs.SecpkIncludes), blocksNum)
}
for blockIdx := 0; blockIdx < blocksNum; blockIdx++ {
for _, mi := range msgs.BlsIncludes[blockIdx] {
if int(mi) >= len(msgs.Bls) {
return nil, xerrors.Errorf("index in BlsIncludes (%d) exceeds number of messages (%d)",
mi, len(msgs.Bls))
}
}
for _, mi := range msgs.SecpkIncludes[blockIdx] {
if int(mi) >= len(msgs.Secpk) {
return nil, xerrors.Errorf("index in SecpkIncludes (%d) exceeds number of messages (%d)",
mi, len(msgs.Secpk))
}
}
}
}
}
}
return validRes, nil
}
// GetBlocks fetches count blocks from the network, from the provided tipset
// *backwards*, returning as many tipsets as count.
//
// {hint/usage}: This is used by the Syncer during normal chain syncing and when
// resolving forks.
func (client *BlockSync) GetBlocks(
ctx context.Context,
tsk types.TipSetKey,
count int,
) ([]*types.TipSet, error) {
ctx, span := trace.StartSpan(ctx, "bsync.GetBlocks")
defer span.End()
if span.IsRecordingEvents() {
span.AddAttributes(
trace.StringAttribute("tipset", fmt.Sprint(tsk.Cids())),
trace.Int64Attribute("count", int64(count)),
)
}
req := &Request{
Head: tsk.Cids(),
Length: uint64(count),
Options: Headers,
}
validRes, err := client.doRequest(ctx, req, nil)
if err != nil {
return nil, err
}
return validRes.Tipsets, nil
}
func (client *BlockSync) GetFullTipSet(
ctx context.Context,
peer peer.ID,
tsk types.TipSetKey,
) (*store.FullTipSet, error) {
// TODO: round robin through these peers on error
req := &Request{
Head: tsk.Cids(),
Length: 1,
Options: Headers | Messages,
}
validRes, err := client.doRequest(ctx, req, &peer)
if err != nil {
return nil, err
}
return validRes.toFullTipSets()[0], nil
// If `doRequest` didn't fail we are guaranteed to have at least
// *one* tipset here, so it's safe to index directly.
}
func (client *BlockSync) GetChainMessages(
ctx context.Context,
head *types.TipSet,
length uint64,
) ([]*CompactedMessages, error) {
ctx, span := trace.StartSpan(ctx, "GetChainMessages")
defer span.End()
req := &Request{
Head: head.Cids(),
Length: length,
Options: Messages,
}
validRes, err := client.doRequest(ctx, req, nil)
if err != nil {
return nil, err
}
return validRes.Messages, nil
}
// Send a request to a peer. Write request in the stream and read the
// response back. We do not do any processing of the request/response
// here.
func (client *BlockSync) sendRequestToPeer(
ctx context.Context,
peer peer.ID,
req *Request,
) (_ *Response, err error) {
// Trace code.
ctx, span := trace.StartSpan(ctx, "sendRequestToPeer")
defer span.End()
if span.IsRecordingEvents() {
span.AddAttributes(
trace.StringAttribute("peer", peer.Pretty()),
)
}
defer func() {
if err != nil {
if span.IsRecordingEvents() {
span.SetStatus(trace.Status{
Code: 5,
Message: err.Error(),
})
}
}
}()
// -- TRACE --
supported, err := client.host.Peerstore().SupportsProtocols(peer, BlockSyncProtocolID)
if err != nil {
return nil, xerrors.Errorf("failed to get protocols for peer: %w", err)
}
if len(supported) == 0 || supported[0] != BlockSyncProtocolID {
return nil, xerrors.Errorf("peer %s does not support protocol %s",
peer, BlockSyncProtocolID)
// FIXME: `ProtoBook` should support a *single* protocol check that returns
// a bool instead of a list.
}
connectionStart := build.Clock.Now()
// Open stream to peer.
stream, err := client.host.NewStream(
inet.WithNoDial(ctx, "should already have connection"),
peer,
BlockSyncProtocolID)
if err != nil {
client.RemovePeer(peer)
return nil, xerrors.Errorf("failed to open stream to peer: %w", err)
}
// Write request.
_ = stream.SetWriteDeadline(time.Now().Add(WRITE_REQ_DEADLINE))
if err := cborutil.WriteCborRPC(stream, req); err != nil {
_ = stream.SetWriteDeadline(time.Time{})
// FIXME: What's the point of setting a blank deadline that won't time out?
// Is this our way of clearing the old one?
client.peerTracker.logFailure(peer, build.Clock.Since(connectionStart))
return nil, err
}
// FIXME: Same, why are we doing this again here?
_ = stream.SetWriteDeadline(time.Time{})
// Read response.
var res Response
err = cborutil.ReadCborRPC(
// FIXME: Extract constants.
bufio.NewReader(incrt.New(stream, READ_RES_MIN_SPEED, READ_RES_DEADLINE)),
&res)
if err != nil {
client.peerTracker.logFailure(peer, build.Clock.Since(connectionStart))
return nil, err
}
// FIXME: Move all this together at the top using a defer as done elsewhere.
// Maybe we need to declare `res` in the signature.
if span.IsRecordingEvents() {
span.AddAttributes(
trace.Int64Attribute("resp_status", int64(res.Status)),
trace.StringAttribute("msg", res.ErrorMessage),
trace.Int64Attribute("chain_len", int64(len(res.Chain))),
)
}
client.peerTracker.logSuccess(peer, build.Clock.Since(connectionStart))
return &res, nil
}
func (client *BlockSync) AddPeer(p peer.ID) {
client.peerTracker.addPeer(p)
}
func (client *BlockSync) RemovePeer(p peer.ID) {
client.peerTracker.removePeer(p)
}
// getShuffledPeers returns a preference-sorted set of peers (by latency
// and failure counting), shuffling the first few peers so we don't always
// pick the same peer.
// FIXME: Merge with the shuffle if we *always* do it.
func (client *BlockSync) getShuffledPeers() []peer.ID {
peers := client.peerTracker.prefSortedPeers()
shufflePrefix(peers)
return peers
}
func shufflePrefix(peers []peer.ID) {
prefix := SHUFFLE_PEERS_PREFIX
if len(peers) < prefix {
prefix = len(peers)
}
buf := make([]peer.ID, prefix)
perm := rand.Perm(prefix)
for i, v := range perm {
buf[i] = peers[v]
}
copy(peers, buf)
}

View File

@ -1,151 +0,0 @@
package blocksync
import (
"context"
"github.com/ipfs/go-cid"
"github.com/ipfs/go-datastore"
"github.com/ipfs/go-graphsync"
"github.com/ipld/go-ipld-prime"
"github.com/libp2p/go-libp2p-core/peer"
"golang.org/x/xerrors"
store "github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
ipldselector "github.com/ipld/go-ipld-prime/traversal/selector"
selectorbuilder "github.com/ipld/go-ipld-prime/traversal/selector/builder"
)
const (
// AMT selector recursion. An AMT has arity of 8 so this gives allows
// us to retrieve trees with 8^10 (1,073,741,824) elements.
amtRecursionDepth = uint32(10)
// some constants for looking up tuple encoded struct fields
// field index of Parents field in a block header
blockIndexParentsField = 5
// field index of Messages field in a block header
blockIndexMessagesField = 10
// field index of AMT node in AMT head
amtHeadNodeFieldIndex = 2
// field index of links array AMT node
amtNodeLinksFieldIndex = 1
// field index of values array AMT node
amtNodeValuesFieldIndex = 2
// maximum depth per traversal
maxRequestLength = 50
)
var amtSelector selectorbuilder.SelectorSpec
func init() {
// builer for selectors
ssb := selectorbuilder.NewSelectorSpecBuilder(basicnode.Style.Any)
// amt selector -- needed to selector through a messages AMT
amtSelector = ssb.ExploreIndex(amtHeadNodeFieldIndex,
ssb.ExploreRecursive(ipldselector.RecursionLimitDepth(int(amtRecursionDepth)),
ssb.ExploreUnion(
ssb.ExploreIndex(amtNodeLinksFieldIndex,
ssb.ExploreAll(ssb.ExploreRecursiveEdge())),
ssb.ExploreIndex(amtNodeValuesFieldIndex,
ssb.ExploreAll(ssb.Matcher())))))
}
func selectorForRequest(req *BlockSyncRequest) ipld.Node {
// builer for selectors
ssb := selectorbuilder.NewSelectorSpecBuilder(basicnode.Style.Any)
bso := ParseBSOptions(req.Options)
if bso.IncludeMessages {
return ssb.ExploreRecursive(ipldselector.RecursionLimitDepth(int(req.RequestLength)),
ssb.ExploreIndex(blockIndexParentsField,
ssb.ExploreUnion(
ssb.ExploreAll(
ssb.ExploreIndex(blockIndexMessagesField,
ssb.ExploreRange(0, 2, amtSelector),
)),
ssb.ExploreIndex(0, ssb.ExploreRecursiveEdge()),
))).Node()
}
return ssb.ExploreRecursive(ipldselector.RecursionLimitDepth(int(req.RequestLength)), ssb.ExploreIndex(blockIndexParentsField,
ssb.ExploreUnion(
ssb.ExploreAll(
ssb.Matcher(),
),
ssb.ExploreIndex(0, ssb.ExploreRecursiveEdge()),
))).Node()
}
func firstTipsetSelector(req *BlockSyncRequest) ipld.Node {
// builer for selectors
ssb := selectorbuilder.NewSelectorSpecBuilder(basicnode.Style.Any)
bso := ParseBSOptions(req.Options)
if bso.IncludeMessages {
return ssb.ExploreIndex(blockIndexMessagesField,
ssb.ExploreRange(0, 2, amtSelector),
).Node()
}
return ssb.Matcher().Node()
}
func (bs *BlockSync) executeGsyncSelector(ctx context.Context, p peer.ID, root cid.Cid, sel ipld.Node) error {
extension := graphsync.ExtensionData{
Name: "chainsync",
Data: nil,
}
_, errs := bs.gsync.Request(ctx, p, cidlink.Link{Cid: root}, sel, extension)
for err := range errs {
return xerrors.Errorf("failed to complete graphsync request: %w", err)
}
return nil
}
// Fallback for interacting with other non-lotus nodes
func (bs *BlockSync) fetchBlocksGraphSync(ctx context.Context, p peer.ID, req *BlockSyncRequest) (*BlockSyncResponse, error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
immediateTsSelector := firstTipsetSelector(req)
// Do this because we can only request one root at a time
for _, r := range req.Start {
if err := bs.executeGsyncSelector(ctx, p, r, immediateTsSelector); err != nil {
return nil, err
}
}
if req.RequestLength > maxRequestLength {
req.RequestLength = maxRequestLength
}
sel := selectorForRequest(req)
// execute the selector forreal
if err := bs.executeGsyncSelector(ctx, p, req.Start[0], sel); err != nil {
return nil, err
}
// Now pull the data we fetched out of the chainstore (where it should now be persisted)
tempcs := store.NewChainStore(bs.bserv.Blockstore(), datastore.NewMapDatastore(), nil)
opts := ParseBSOptions(req.Options)
tsk := types.NewTipSetKey(req.Start...)
chain, err := collectChainSegment(tempcs, tsk, req.RequestLength, opts)
if err != nil {
return nil, xerrors.Errorf("failed to load chain data from chainstore after successful graphsync response (start = %v): %w", req.Start, err)
}
return &BlockSyncResponse{Chain: chain}, nil
}

View File

@ -0,0 +1,169 @@
package blocksync
// FIXME: This needs to be reviewed.
import (
"sort"
"sync"
"time"
"github.com/libp2p/go-libp2p-core/peer"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/lib/peermgr"
)
type peerStats struct {
successes int
failures int
firstSeen time.Time
averageTime time.Duration
}
type bsPeerTracker struct {
lk sync.Mutex
peers map[peer.ID]*peerStats
avgGlobalTime time.Duration
pmgr *peermgr.PeerMgr
}
func newPeerTracker(pmgr *peermgr.PeerMgr) *bsPeerTracker {
return &bsPeerTracker{
peers: make(map[peer.ID]*peerStats),
pmgr: pmgr,
}
}
func (bpt *bsPeerTracker) addPeer(p peer.ID) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
if _, ok := bpt.peers[p]; ok {
return
}
bpt.peers[p] = &peerStats{
firstSeen: build.Clock.Now(),
}
}
const (
// newPeerMul is how much better than average is the new peer assumed to be
// less than one to encourouge trying new peers
newPeerMul = 0.9
)
func (bpt *bsPeerTracker) prefSortedPeers() []peer.ID {
// TODO: this could probably be cached, but as long as its not too many peers, fine for now
bpt.lk.Lock()
defer bpt.lk.Unlock()
out := make([]peer.ID, 0, len(bpt.peers))
for p := range bpt.peers {
out = append(out, p)
}
// sort by 'expected cost' of requesting data from that peer
// additionally handle edge cases where not enough data is available
sort.Slice(out, func(i, j int) bool {
pi := bpt.peers[out[i]]
pj := bpt.peers[out[j]]
var costI, costJ float64
getPeerInitLat := func(p peer.ID) float64 {
var res float64
if bpt.pmgr != nil {
if lat, ok := bpt.pmgr.GetPeerLatency(p); ok {
res = float64(lat)
}
}
if res == 0 {
res = float64(bpt.avgGlobalTime)
}
return res * newPeerMul
}
if pi.successes+pi.failures > 0 {
failRateI := float64(pi.failures) / float64(pi.failures+pi.successes)
costI = float64(pi.averageTime) + failRateI*float64(bpt.avgGlobalTime)
} else {
costI = getPeerInitLat(out[i])
}
if pj.successes+pj.failures > 0 {
failRateJ := float64(pj.failures) / float64(pj.failures+pj.successes)
costJ = float64(pj.averageTime) + failRateJ*float64(bpt.avgGlobalTime)
} else {
costJ = getPeerInitLat(out[j])
}
return costI < costJ
})
return out
}
const (
// xInvAlpha = (N+1)/2
localInvAlpha = 5 // 86% of the value is the last 9
globalInvAlpha = 20 // 86% of the value is the last 39
)
func (bpt *bsPeerTracker) logGlobalSuccess(dur time.Duration) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
if bpt.avgGlobalTime == 0 {
bpt.avgGlobalTime = dur
return
}
delta := (dur - bpt.avgGlobalTime) / globalInvAlpha
bpt.avgGlobalTime += delta
}
func logTime(pi *peerStats, dur time.Duration) {
if pi.averageTime == 0 {
pi.averageTime = dur
return
}
delta := (dur - pi.averageTime) / localInvAlpha
pi.averageTime += delta
}
func (bpt *bsPeerTracker) logSuccess(p peer.ID, dur time.Duration) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
var pi *peerStats
var ok bool
if pi, ok = bpt.peers[p]; !ok {
log.Warnw("log success called on peer not in tracker", "peerid", p.String())
return
}
pi.successes++
logTime(pi, dur)
}
func (bpt *bsPeerTracker) logFailure(p peer.ID, dur time.Duration) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
var pi *peerStats
var ok bool
if pi, ok = bpt.peers[p]; !ok {
log.Warn("log failure called on peer not in tracker", "peerid", p.String())
return
}
pi.failures++
logTime(pi, dur)
}
func (bpt *bsPeerTracker) removePeer(p peer.ID) {
bpt.lk.Lock()
defer bpt.lk.Unlock()
delete(bpt.peers, p)
}

177
chain/blocksync/protocol.go Normal file
View File

@ -0,0 +1,177 @@
package blocksync
import (
"github.com/filecoin-project/lotus/chain/store"
"time"
"github.com/ipfs/go-cid"
logging "github.com/ipfs/go-log"
"golang.org/x/xerrors"
"github.com/filecoin-project/lotus/chain/types"
)
var log = logging.Logger("blocksync")
const BlockSyncProtocolID = "/fil/sync/blk/0.0.1"
const MaxRequestLength = 800
// Extracted constants from the code.
// FIXME: Should be reviewed and confirmed.
const SUCCESS_PEER_TAG_VALUE = 25
const WRITE_REQ_DEADLINE = 5 * time.Second
const READ_RES_DEADLINE = WRITE_REQ_DEADLINE
const READ_RES_MIN_SPEED = 50<<10
const SHUFFLE_PEERS_PREFIX = 5
const WRITE_RES_DEADLINE = 60 * time.Second
// FIXME: Rename. Make private.
type Request struct {
// List of ordered CIDs comprising a `TipSetKey` from where to start
// fetching backwards.
// FIXME: Why don't we send a `TipSetKey` instead of converting back
// and forth?
Head []cid.Cid
// Number of block sets to fetch from `Head` (inclusive, should always
// be in the range `[1, MaxRequestLength]`).
Length uint64
// Request options, see `Options` type for more details. Compressed
// in a single `uint64` to save space.
Options uint64
}
// `Request` processed and validated to query the tipsets needed.
type validatedRequest struct {
head types.TipSetKey
length uint64
options *parsedOptions
}
// Request options. When fetching the chain segment we can fetch
// either block headers, messages, or both.
const (
Headers = 1 << iota
Messages
)
// Decompressed options into separate struct members for easy access
// during internal processing..
type parsedOptions struct {
IncludeHeaders bool
IncludeMessages bool
}
func (options *parsedOptions) noOptionsSet() bool {
return options.IncludeHeaders == false &&
options.IncludeMessages == false
}
func parseOptions(optfield uint64) *parsedOptions {
return &parsedOptions{
IncludeHeaders: optfield&(uint64(Headers)) != 0,
IncludeMessages: optfield&(uint64(Messages)) != 0,
}
}
// FIXME: Rename. Make private.
type Response struct {
Status status
// String that complements the error status when converting to an
// internal error (see `statusToError()`).
ErrorMessage string
Chain []*BSTipSet
}
type status uint64
const (
Ok status = 0
// We could not fetch all blocks requested (but at least we returned
// the `Head` requested). Not considered an error.
Partial = 101
// Errors
NotFound = 201
GoAway = 202
InternalError = 203
BadRequest = 204
)
// Convert status to internal error.
func (res *Response) statusToError() error {
switch res.Status {
case Ok, Partial:
return nil
// FIXME: Consider if we want to not process `Partial` responses
// and return an error instead.
case NotFound:
return xerrors.Errorf("not found")
case GoAway:
return xerrors.Errorf("not handling 'go away' blocksync responses yet")
case InternalError:
return xerrors.Errorf("block sync peer errored: %s", res.ErrorMessage)
case BadRequest:
return xerrors.Errorf("block sync request invalid: %s", res.ErrorMessage)
default:
return xerrors.Errorf("unrecognized response code: %d", res.Status)
}
}
// FIXME: Rename.
type BSTipSet struct {
Blocks []*types.BlockHeader
Messages *CompactedMessages
}
// FIXME: Describe format. The `Includes` seem to index
// from block to message.
// FIXME: The logic of this function should belong to it, not
// to the consumer.
type CompactedMessages struct {
Bls []*types.Message
BlsIncludes [][]uint64
Secpk []*types.SignedMessage
SecpkIncludes [][]uint64
}
// Response that has been validated according to the protocol
// and can be safely accessed.
// FIXME: Maybe rename to verified, keep consistent naming.
type ValidatedResponse struct {
Tipsets []*types.TipSet
Messages []*CompactedMessages
}
// Decompress messages and form full tipsets with them. The headers
// need to have been requested as well.
func (res *ValidatedResponse) toFullTipSets() ([]*store.FullTipSet) {
if len(res.Tipsets) == 0 {
// This decompression can only be done if both headers and
// messages are returned in the response.
// FIXME: Do we need to check the messages are present also? The validation
// would seem to imply this is unnecessary, can be added just in case.
return nil
}
ftsList := make([]*store.FullTipSet, len(res.Tipsets))
for tipsetIdx := range res.Tipsets {
fts := &store.FullTipSet{} // FIXME: We should use the `NewFullTipSet` API.
msgs := res.Messages[tipsetIdx]
for blockIdx, b := range res.Tipsets[tipsetIdx].Blocks() {
fb := &types.FullBlock{
Header: b,
}
for _, mi := range msgs.BlsIncludes[blockIdx] {
fb.BlsMessages = append(fb.BlsMessages, msgs.Bls[mi])
}
for _, mi := range msgs.SecpkIncludes[blockIdx] {
fb.SecpkMessages = append(fb.SecpkMessages, msgs.Secpk[mi])
}
fts.Blocks = append(fts.Blocks, fb)
}
ftsList[tipsetIdx] = fts
}
return ftsList
}

263
chain/blocksync/server.go Normal file
View File

@ -0,0 +1,263 @@
package blocksync
import (
"bufio"
"context"
"fmt"
"time"
"go.opencensus.io/trace"
"golang.org/x/xerrors"
cborutil "github.com/filecoin-project/go-cbor-util"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
"github.com/ipfs/go-cid"
inet "github.com/libp2p/go-libp2p-core/network"
)
// BlockSyncService is the component that services BlockSync requests from
// peers.
//
// BlockSync is the basic chain synchronization protocol of Filecoin. BlockSync
// is an RPC-oriented protocol, with a single operation to request blocks.
//
// A request contains a start anchor block (referred to with a CID), and a
// amount of blocks requested beyond the anchor (including the anchor itself).
//
// A client can also pass options, encoded as a 64-bit bitfield. Lotus supports
// two options at the moment:
//
// - include block contents
// - include block messages
//
// The response will include a status code, an optional message, and the
// response payload in case of success. The payload is a slice of serialized
// tipsets.
// FIXME: Rename to just `Server` (will be done later, see note on `BlockSync`).
type BlockSyncService struct {
cs *store.ChainStore
}
func NewBlockSyncService(cs *store.ChainStore) *BlockSyncService {
return &BlockSyncService{
cs: cs,
}
}
// Entry point of the service, handles `Request`s.
func (server *BlockSyncService) HandleStream(stream inet.Stream) {
ctx, span := trace.StartSpan(context.Background(), "blocksync.HandleStream")
defer span.End()
defer stream.Close() //nolint:errcheck
var req Request
if err := cborutil.ReadCborRPC(bufio.NewReader(stream), &req); err != nil {
log.Warnf("failed to read block sync request: %s", err)
return
}
log.Infow("block sync request",
"start", req.Head, "len", req.Length)
resp, err := server.processRequest(ctx, &req)
if err != nil {
log.Warn("failed to process request: ", err)
return
}
_ = stream.SetDeadline(time.Now().Add(WRITE_RES_DEADLINE))
if err := cborutil.WriteCborRPC(stream, resp); err != nil {
log.Warnw("failed to write back response for handle stream",
"err", err, "peer", stream.Conn().RemotePeer())
return
}
}
// Validate and service the request. We return either a protocol
// response or an internal error. The protocol response may signal
// a protocol error itself (e.g., invalid request).
func (server *BlockSyncService) processRequest(
ctx context.Context,
req *Request,
) (*Response, error) {
validReq, errResponse := validateRequest(ctx, req)
if errResponse != nil {
// The request did not pass validation, return the response
// indicating it.
return errResponse, nil
}
return server.serviceRequest(ctx, validReq)
}
// Validate request. We either return a `validatedRequest`, or an error
// `Response` indicating why we can't process it. We do not return any
// internal errors here, we just signal protocol ones.
func validateRequest(
ctx context.Context,
req *Request,
) ( *validatedRequest, *Response) {
_, span := trace.StartSpan(ctx, "blocksync.ValidateRequest")
defer span.End()
validReq := validatedRequest{}
validReq.options = parseOptions(req.Options)
if validReq.options.noOptionsSet() {
return nil, &Response{
Status: BadRequest,
ErrorMessage: "no options set",
}
}
validReq.length = req.Length
if validReq.length > MaxRequestLength {
return nil, &Response{
Status: BadRequest,
ErrorMessage: fmt.Sprintf("request length over maximum allowed (%d)",
MaxRequestLength),
}
}
if validReq.length == 0 {
return nil, &Response{
Status: BadRequest,
ErrorMessage: "invalid request length of zero",
}
}
if len(req.Head) == 0 {
return nil, &Response{
Status: BadRequest,
ErrorMessage: "no cids in request",
}
}
validReq.head = types.NewTipSetKey(req.Head...)
// FIXME: Add as a defer at the start.
span.AddAttributes(
trace.BoolAttribute("blocks", validReq.options.IncludeHeaders),
trace.BoolAttribute("messages", validReq.options.IncludeMessages),
trace.Int64Attribute("reqlen", int64(validReq.length)),
)
return &validReq, nil
}
func (server *BlockSyncService) serviceRequest(
ctx context.Context,
req *validatedRequest,
) (*Response, error) {
_, span := trace.StartSpan(ctx, "blocksync.ServiceRequest")
defer span.End()
chain, err := collectChainSegment(server.cs, req)
if err != nil {
log.Warn("block sync request: collectChainSegment failed: ", err)
return &Response{
Status: InternalError,
ErrorMessage: err.Error(),
}, nil
}
status := Ok
if len(chain) < int(req.length) {
status = Partial
}
return &Response{
Chain: chain,
Status: status,
}, nil
}
func collectChainSegment(
cs *store.ChainStore,
req *validatedRequest,
) ([]*BSTipSet, error) {
var bstips []*BSTipSet
cur := req.head
for {
var bst BSTipSet
ts, err := cs.LoadTipSet(cur)
if err != nil {
return nil, xerrors.Errorf("failed loading tipset %s: %w", cur, err)
}
if req.options.IncludeHeaders {
bst.Blocks = ts.Blocks()
}
if req.options.IncludeMessages {
bmsgs, bmincl, smsgs, smincl, err := gatherMessages(cs, ts)
if err != nil {
return nil, xerrors.Errorf("gather messages failed: %w", err)
}
// FIXME: Pass the response to the `gatherMessages` and set all this there.
bst.Messages = &CompactedMessages{}
bst.Messages.Bls = bmsgs
bst.Messages.BlsIncludes = bmincl
bst.Messages.Secpk = smsgs
bst.Messages.SecpkIncludes = smincl
}
bstips = append(bstips, &bst)
// If we collected the length requested or if we reached the
// start (genesis), then stop.
if uint64(len(bstips)) >= req.length || ts.Height() == 0 {
return bstips, nil
}
cur = ts.Parents()
}
}
func gatherMessages(cs *store.ChainStore, ts *types.TipSet) ([]*types.Message, [][]uint64, []*types.SignedMessage, [][]uint64, error) {
blsmsgmap := make(map[cid.Cid]uint64)
secpkmsgmap := make(map[cid.Cid]uint64)
var secpkmsgs []*types.SignedMessage
var blsmsgs []*types.Message
var secpkincl, blsincl [][]uint64
for _, block := range ts.Blocks() {
bmsgs, smsgs, err := cs.MessagesForBlock(block)
if err != nil {
return nil, nil, nil, nil, err
}
// FIXME: DRY. Use `chain.Message` interface.
bmi := make([]uint64, 0, len(bmsgs))
for _, m := range bmsgs {
i, ok := blsmsgmap[m.Cid()]
if !ok {
i = uint64(len(blsmsgs))
blsmsgs = append(blsmsgs, m)
blsmsgmap[m.Cid()] = i
}
bmi = append(bmi, i)
}
blsincl = append(blsincl, bmi)
smi := make([]uint64, 0, len(smsgs))
for _, m := range smsgs {
i, ok := secpkmsgmap[m.Cid()]
if !ok {
i = uint64(len(secpkmsgs))
secpkmsgs = append(secpkmsgs, m)
secpkmsgmap[m.Cid()] = i
}
smi = append(smi, i)
}
secpkincl = append(secpkincl, smi)
}
return blsmsgs, blsincl, secpkmsgs, secpkincl, nil
}

View File

@ -10,6 +10,8 @@ import (
"golang.org/x/xerrors"
address "github.com/filecoin-project/go-address"
blocks "github.com/ipfs/go-block-format"
bserv "github.com/ipfs/go-blockservice"
miner "github.com/filecoin-project/specs-actors/actors/builtin/miner"
"github.com/filecoin-project/specs-actors/actors/util/adt"
lru "github.com/hashicorp/golang-lru"
@ -38,7 +40,7 @@ import (
var log = logging.Logger("sub")
func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *chain.Syncer, cmgr connmgr.ConnManager) {
func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *chain.Syncer, bserv bserv.BlockService, cmgr connmgr.ConnManager) {
for {
msg, err := bsub.Next(ctx)
if err != nil {
@ -61,13 +63,13 @@ func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *cha
go func() {
start := build.Clock.Now()
log.Debug("about to fetch messages for block from pubsub")
bmsgs, err := s.Bsync.FetchMessagesByCids(context.TODO(), blk.BlsMessages)
bmsgs, err := FetchMessagesByCids(context.TODO(), bserv, blk.BlsMessages)
if err != nil {
log.Errorf("failed to fetch all bls messages for block received over pubusb: %s; source: %s", err, src)
return
}
smsgs, err := s.Bsync.FetchSignedMessagesByCids(context.TODO(), blk.SecpkMessages)
smsgs, err := FetchSignedMessagesByCids(context.TODO(), bserv, blk.SecpkMessages)
if err != nil {
log.Errorf("failed to fetch all secpk messages for block received over pubusb: %s; source: %s", err, src)
return
@ -90,6 +92,108 @@ func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *cha
}
}
func FetchMessagesByCids(
ctx context.Context,
bserv bserv.BlockService,
cids []cid.Cid,
) ([]*types.Message, error) {
out := make([]*types.Message, len(cids))
err := fetchCids(ctx, bserv, cids, func(i int, b blocks.Block) error {
msg, err := types.DecodeMessage(b.RawData())
if err != nil {
return err
}
// FIXME: We already sort in `fetchCids`, we are duplicating too much work,
// we don't need to pass the index.
if out[i] != nil {
return fmt.Errorf("received duplicate message")
}
out[i] = msg
return nil
})
if err != nil {
return nil, err
}
return out, nil
}
// FIXME: Duplicate of above.
func FetchSignedMessagesByCids(
ctx context.Context,
bserv bserv.BlockService,
cids []cid.Cid,
) ([]*types.SignedMessage, error) {
out := make([]*types.SignedMessage, len(cids))
err := fetchCids(ctx, bserv, cids, func(i int, b blocks.Block) error {
smsg, err := types.DecodeSignedMessage(b.RawData())
if err != nil {
return err
}
if out[i] != nil {
return fmt.Errorf("received duplicate message")
}
out[i] = smsg
return nil
})
if err != nil {
return nil, err
}
return out, nil
}
// Fetch `cids` from the block service, apply `cb` on each of them. Used
// by the fetch message functions above.
// We check that each block is received only once and we do not received
// blocks we did not request.
func fetchCids(
ctx context.Context,
bserv bserv.BlockService,
cids []cid.Cid,
cb func(int, blocks.Block) error,
) error {
// FIXME: Why don't we use the context here?
fetchedBlocks := bserv.GetBlocks(context.TODO(), cids)
cidIndex := make(map[cid.Cid]int)
for i, c := range cids {
cidIndex[c] = i
}
for i := 0; i < len(cids); i++ {
select {
case block, ok := <-fetchedBlocks:
if !ok {
// Closed channel, no more blocks fetched, check if we have all
// of the CIDs requested.
// FIXME: Review this check. We don't call the callback on the
// last index?
if i == len(cids)-1 {
break
}
return fmt.Errorf("failed to fetch all messages")
}
ix, ok := cidIndex[block.Cid()]
if !ok {
return fmt.Errorf("received message we didnt ask for")
}
if err := cb(ix, block); err != nil {
return err
}
}
}
return nil
}
type BlockValidator struct {
peers *lru.TwoQueueCache

View File

@ -1384,7 +1384,7 @@ func (syncer *Syncer) iterFullTipsets(ctx context.Context, headers []*types.TipS
nextI := (i + 1) - batchSize // want to fetch batchSize values, 'i' points to last one we want to fetch, so its 'inclusive' of our request, thus we need to add one to our request start index
var bstout []*blocksync.BSTipSet
var bstout []*blocksync.CompactedMessages
for len(bstout) < batchSize {
next := headers[nextI]
@ -1405,10 +1405,10 @@ func (syncer *Syncer) iterFullTipsets(ctx context.Context, headers []*types.TipS
this := headers[i-bsi]
bstip := bstout[len(bstout)-(bsi+1)]
fts, err := zipTipSetAndMessages(blks, this, bstip.BlsMessages, bstip.SecpkMessages, bstip.BlsMsgIncludes, bstip.SecpkMsgIncludes)
fts, err := zipTipSetAndMessages(blks, this, bstip.Bls, bstip.Secpk, bstip.BlsIncludes, bstip.SecpkIncludes)
if err != nil {
log.Warnw("zipping failed", "error", err, "bsi", bsi, "i", i,
"height", this.Height(), "bstip-height", bstip.Blocks[0].Height,
"height", this.Height(),
"next-height", i+batchSize)
return xerrors.Errorf("message processing failed: %w", err)
}
@ -1431,15 +1431,15 @@ func (syncer *Syncer) iterFullTipsets(ctx context.Context, headers []*types.TipS
return nil
}
func persistMessages(bs bstore.Blockstore, bst *blocksync.BSTipSet) error {
for _, m := range bst.BlsMessages {
func persistMessages(bs bstore.Blockstore, bst *blocksync.CompactedMessages) error {
for _, m := range bst.Bls {
//log.Infof("putting BLS message: %s", m.Cid())
if _, err := store.PutMessage(bs, m); err != nil {
log.Errorf("failed to persist messages: %+v", err)
return xerrors.Errorf("BLS message processing failed: %w", err)
}
}
for _, m := range bst.SecpkMessages {
for _, m := range bst.Secpk {
if m.Signature.Type != crypto.SigTypeSecp256k1 {
return xerrors.Errorf("unknown signature type on message %s: %q", m.Cid(), m.Signature.Type)
}

View File

@ -224,3 +224,7 @@ func (ts *TipSet) Contains(oc cid.Cid) bool {
}
return false
}
func (ts *TipSet) IsChildOf(parent *TipSet) bool {
return CidArrsEqual(ts.Parents().Cids(), parent.Cids())
}

View File

@ -63,8 +63,9 @@ func main() {
}
err = gen.WriteTupleEncodersToFile("./chain/blocksync/cbor_gen.go", "blocksync",
blocksync.BlockSyncRequest{},
blocksync.BlockSyncResponse{},
blocksync.Request{},
blocksync.Response{},
blocksync.CompactedMessages{},
blocksync.BSTipSet{},
)
if err != nil {

View File

@ -239,7 +239,7 @@ func Online() Option {
// Filecoin services
Override(new(*chain.Syncer), modules.NewSyncer),
Override(new(*blocksync.BlockSync), blocksync.NewBlockSyncClient),
Override(new(*blocksync.BlockSync), blocksync.NewClient),
Override(new(*messagepool.MessagePool), modules.MessagePool),
Override(new(modules.Genesis), modules.ErrorGenesis),

View File

@ -73,7 +73,7 @@ func RunBlockSync(h host.Host, svc *blocksync.BlockSyncService) {
h.SetStreamHandler(blocksync.BlockSyncProtocolID, svc.HandleStream)
}
func HandleIncomingBlocks(mctx helpers.MetricsCtx, lc fx.Lifecycle, ps *pubsub.PubSub, s *chain.Syncer, chain *store.ChainStore, stmgr *stmgr.StateManager, h host.Host, nn dtypes.NetworkName) {
func HandleIncomingBlocks(mctx helpers.MetricsCtx, lc fx.Lifecycle, ps *pubsub.PubSub, s *chain.Syncer, bserv dtypes.ChainBlockService, chain *store.ChainStore, stmgr *stmgr.StateManager, h host.Host, nn dtypes.NetworkName) {
ctx := helpers.LifecycleCtx(mctx, lc)
blocksub, err := ps.Subscribe(build.BlocksTopic(nn))
@ -92,7 +92,7 @@ func HandleIncomingBlocks(mctx helpers.MetricsCtx, lc fx.Lifecycle, ps *pubsub.P
panic(err)
}
go sub.HandleIncomingBlocks(ctx, blocksub, s, h.ConnManager())
go sub.HandleIncomingBlocks(ctx, blocksub, s, bserv, h.ConnManager())
}
func HandleIncomingMessages(mctx helpers.MetricsCtx, lc fx.Lifecycle, ps *pubsub.PubSub, mpool *messagepool.MessagePool, nn dtypes.NetworkName) {