lotus/chain/sync_manager.go

459 lines
9.8 KiB
Go
Raw Permalink Normal View History

package chain
import (
"context"
2019-11-15 02:27:43 +00:00
"sort"
"sync"
"github.com/filecoin-project/lotus/chain/types"
2019-12-05 01:18:30 +00:00
peer "github.com/libp2p/go-libp2p-core/peer"
)
const BootstrapPeerThreshold = 2
2019-12-08 11:44:47 +00:00
const (
BSStateInit = 0
BSStateSelected = 1
BSStateScheduled = 2
BSStateComplete = 3
)
type SyncFunc func(context.Context, *types.TipSet) error
2020-09-14 20:58:59 +00:00
// SyncManager manages the chain synchronization process, both at bootstrap time
// and during ongoing operation.
//
// It receives candidate chain heads in the form of tipsets from peers,
// and schedules them onto sync workers, deduplicating processing for
// already-active syncs.
type SyncManager interface {
// Start starts the SyncManager.
Start()
// Stop stops the SyncManager.
Stop()
// SetPeerHead informs the SyncManager that the supplied peer reported the
// supplied tipset.
SetPeerHead(ctx context.Context, p peer.ID, ts *types.TipSet)
// State retrieves the state of the sync workers.
State() []SyncerState
}
type syncManager struct {
2019-12-08 11:44:47 +00:00
lk sync.Mutex
peerHeads map[peer.ID]*types.TipSet
bssLk sync.Mutex
bootstrapState int
bspThresh int
2019-11-15 21:35:29 +00:00
incomingTipSets chan *types.TipSet
syncTargets chan *types.TipSet
syncResults chan *syncResult
2019-11-16 01:05:16 +00:00
syncStates []*SyncerState
2019-11-15 02:27:43 +00:00
// Normally this handler is set to `(*Syncer).Sync()`.
doSync func(context.Context, *types.TipSet) error
2019-11-15 02:27:43 +00:00
stop chan struct{}
2019-11-16 21:36:21 +00:00
// Sync Scheduler fields
activeSyncs map[types.TipSetKey]*types.TipSet
syncQueue syncBucketSet
activeSyncTips syncBucketSet
nextSyncTarget *syncTargetBucket
workerChan chan *types.TipSet
}
2020-09-14 20:58:59 +00:00
var _ SyncManager = (*syncManager)(nil)
2019-11-15 21:35:29 +00:00
type syncResult struct {
ts *types.TipSet
success bool
}
const syncWorkerCount = 3
2020-09-14 20:58:59 +00:00
func NewSyncManager(sync SyncFunc) SyncManager {
return &syncManager{
2019-11-15 21:35:29 +00:00
bspThresh: 1,
peerHeads: make(map[peer.ID]*types.TipSet),
syncTargets: make(chan *types.TipSet),
syncResults: make(chan *syncResult),
2019-11-16 01:05:16 +00:00
syncStates: make([]*SyncerState, syncWorkerCount),
2019-11-15 21:35:29 +00:00
incomingTipSets: make(chan *types.TipSet),
activeSyncs: make(map[types.TipSetKey]*types.TipSet),
doSync: sync,
stop: make(chan struct{}),
}
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) Start() {
2019-11-15 21:35:29 +00:00
go sm.syncScheduler()
for i := 0; i < syncWorkerCount; i++ {
go sm.syncWorker(i)
}
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) Stop() {
2019-11-15 21:35:29 +00:00
close(sm.stop)
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) SetPeerHead(ctx context.Context, p peer.ID, ts *types.TipSet) {
sm.lk.Lock()
defer sm.lk.Unlock()
sm.peerHeads[p] = ts
2019-12-08 11:44:47 +00:00
if sm.getBootstrapState() == BSStateInit {
spc := sm.syncedPeerCount()
if spc >= sm.bspThresh {
// Its go time!
2019-11-15 02:27:43 +00:00
target, err := sm.selectSyncTarget()
if err != nil {
log.Error("failed to select sync target: ", err)
return
}
2019-12-08 11:44:47 +00:00
sm.setBootstrapState(BSStateSelected)
2019-11-15 02:27:43 +00:00
2019-11-15 21:35:29 +00:00
sm.incomingTipSets <- target
}
log.Infof("sync bootstrap has %d peers", spc)
return
}
2019-11-15 21:35:29 +00:00
sm.incomingTipSets <- ts
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) State() []SyncerState {
ret := make([]SyncerState, 0, len(sm.syncStates))
for _, s := range sm.syncStates {
ret = append(ret, s.Snapshot())
}
return ret
}
2019-11-15 02:27:43 +00:00
type syncBucketSet struct {
buckets []*syncTargetBucket
}
2019-12-05 01:18:30 +00:00
func newSyncTargetBucket(tipsets ...*types.TipSet) *syncTargetBucket {
var stb syncTargetBucket
for _, ts := range tipsets {
stb.add(ts)
}
return &stb
}
func (sbs *syncBucketSet) RelatedToAny(ts *types.TipSet) bool {
for _, b := range sbs.buckets {
if b.sameChainAs(ts) {
return true
}
}
return false
}
2019-11-15 02:27:43 +00:00
func (sbs *syncBucketSet) Insert(ts *types.TipSet) {
for _, b := range sbs.buckets {
if b.sameChainAs(ts) {
b.add(ts)
return
}
}
2019-12-05 01:18:30 +00:00
sbs.buckets = append(sbs.buckets, newSyncTargetBucket(ts))
2019-11-15 02:27:43 +00:00
}
func (sbs *syncBucketSet) Pop() *syncTargetBucket {
var bestBuck *syncTargetBucket
var bestTs *types.TipSet
for _, b := range sbs.buckets {
hts := b.heaviestTipSet()
if bestBuck == nil || bestTs.ParentWeight().LessThan(hts.ParentWeight()) {
bestBuck = b
bestTs = hts
}
}
2019-11-15 21:35:29 +00:00
sbs.removeBucket(bestBuck)
2019-11-15 02:27:43 +00:00
return bestBuck
}
2019-11-15 21:35:29 +00:00
func (sbs *syncBucketSet) removeBucket(toremove *syncTargetBucket) {
nbuckets := make([]*syncTargetBucket, 0, len(sbs.buckets)-1)
for _, b := range sbs.buckets {
if b != toremove {
nbuckets = append(nbuckets, b)
}
}
sbs.buckets = nbuckets
}
func (sbs *syncBucketSet) PopRelated(ts *types.TipSet) *syncTargetBucket {
for _, b := range sbs.buckets {
if b.sameChainAs(ts) {
sbs.removeBucket(b)
return b
}
}
return nil
}
2019-11-15 02:27:43 +00:00
func (sbs *syncBucketSet) Heaviest() *types.TipSet {
// TODO: should also consider factoring in number of peers represented by each bucket here
var bestTs *types.TipSet
2019-11-15 21:35:29 +00:00
for _, b := range sbs.buckets {
2019-11-15 02:27:43 +00:00
bhts := b.heaviestTipSet()
if bestTs == nil || bhts.ParentWeight().GreaterThan(bestTs.ParentWeight()) {
bestTs = bhts
}
}
return bestTs
}
2019-12-08 11:44:47 +00:00
func (sbs *syncBucketSet) Empty() bool {
return len(sbs.buckets) == 0
}
2019-11-15 02:27:43 +00:00
type syncTargetBucket struct {
tips []*types.TipSet
count int
}
func (stb *syncTargetBucket) sameChainAs(ts *types.TipSet) bool {
for _, t := range stb.tips {
if ts.Equals(t) {
return true
}
2019-12-16 19:22:56 +00:00
if ts.Key() == t.Parents() {
2019-11-15 02:27:43 +00:00
return true
}
2019-12-16 19:22:56 +00:00
if ts.Parents() == t.Key() {
2019-11-15 02:27:43 +00:00
return true
}
}
return false
}
func (stb *syncTargetBucket) add(ts *types.TipSet) {
stb.count++
for _, t := range stb.tips {
if t.Equals(ts) {
return
}
}
stb.tips = append(stb.tips, ts)
}
func (stb *syncTargetBucket) heaviestTipSet() *types.TipSet {
2019-11-15 21:35:29 +00:00
if stb == nil {
return nil
}
2019-11-15 02:27:43 +00:00
var best *types.TipSet
for _, ts := range stb.tips {
if best == nil || ts.ParentWeight().GreaterThan(best.ParentWeight()) {
best = ts
}
}
return best
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) selectSyncTarget() (*types.TipSet, error) {
2019-11-15 02:27:43 +00:00
var buckets syncBucketSet
var peerHeads []*types.TipSet
for _, ts := range sm.peerHeads {
peerHeads = append(peerHeads, ts)
}
sort.Slice(peerHeads, func(i, j int) bool {
return peerHeads[i].Height() < peerHeads[j].Height()
})
for _, ts := range peerHeads {
buckets.Insert(ts)
}
if len(buckets.buckets) > 1 {
2019-12-05 01:18:30 +00:00
log.Warn("caution, multiple distinct chains seen during head selections")
2019-11-15 02:27:43 +00:00
// TODO: we *could* refuse to sync here without user intervention.
// For now, just select the best cluster
}
return buckets.Heaviest(), nil
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) syncScheduler() {
2019-11-15 02:27:43 +00:00
for {
select {
case ts, ok := <-sm.incomingTipSets:
if !ok {
log.Info("shutting down sync scheduler")
return
}
2019-11-16 21:36:21 +00:00
sm.scheduleIncoming(ts)
case res := <-sm.syncResults:
sm.scheduleProcessResult(res)
case sm.workerChan <- sm.nextSyncTarget.heaviestTipSet():
sm.scheduleWorkSent()
case <-sm.stop:
log.Info("sync scheduler shutting down")
return
}
}
}
2019-11-15 02:27:43 +00:00
2020-09-14 20:58:59 +00:00
func (sm *syncManager) scheduleIncoming(ts *types.TipSet) {
2020-06-09 19:49:31 +00:00
log.Debug("scheduling incoming tipset sync: ", ts.Cids())
2019-12-08 11:44:47 +00:00
if sm.getBootstrapState() == BSStateSelected {
sm.setBootstrapState(BSStateScheduled)
sm.syncTargets <- ts
return
}
2019-11-16 21:36:21 +00:00
var relatedToActiveSync bool
for _, acts := range sm.activeSyncs {
if ts.Equals(acts) {
break
}
2019-11-15 02:27:43 +00:00
2019-12-16 19:22:56 +00:00
if ts.Parents() == acts.Key() {
2019-11-16 21:36:21 +00:00
// sync this next, after that sync process finishes
relatedToActiveSync = true
}
}
2019-11-15 02:27:43 +00:00
if !relatedToActiveSync && sm.activeSyncTips.RelatedToAny(ts) {
relatedToActiveSync = true
}
2019-11-16 21:36:21 +00:00
// if this is related to an active sync process, immediately bucket it
// we don't want to start a parallel sync process that duplicates work
if relatedToActiveSync {
sm.activeSyncTips.Insert(ts)
return
}
2019-12-08 11:44:47 +00:00
if sm.getBootstrapState() == BSStateScheduled {
sm.syncQueue.Insert(ts)
return
}
2019-11-16 21:36:21 +00:00
if sm.nextSyncTarget != nil && sm.nextSyncTarget.sameChainAs(ts) {
sm.nextSyncTarget.add(ts)
} else {
sm.syncQueue.Insert(ts)
if sm.nextSyncTarget == nil {
sm.nextSyncTarget = sm.syncQueue.Pop()
sm.workerChan = sm.syncTargets
}
}
}
2019-11-15 21:35:29 +00:00
2020-09-14 20:58:59 +00:00
func (sm *syncManager) scheduleProcessResult(res *syncResult) {
2019-12-08 11:44:47 +00:00
if res.success && sm.getBootstrapState() != BSStateComplete {
sm.setBootstrapState(BSStateComplete)
}
2020-09-14 20:58:59 +00:00
2019-11-16 21:36:21 +00:00
delete(sm.activeSyncs, res.ts.Key())
relbucket := sm.activeSyncTips.PopRelated(res.ts)
if relbucket != nil {
if res.success {
if sm.nextSyncTarget == nil {
sm.nextSyncTarget = relbucket
sm.workerChan = sm.syncTargets
2019-11-15 02:27:43 +00:00
} else {
2019-11-16 21:36:21 +00:00
sm.syncQueue.buckets = append(sm.syncQueue.buckets, relbucket)
2019-11-15 02:27:43 +00:00
}
2019-12-08 11:44:47 +00:00
return
2019-11-15 02:27:43 +00:00
}
// TODO: this is the case where we try to sync a chain, and
// fail, and we have more blocks on top of that chain that
// have come in since. The question is, should we try to
// sync these? or just drop them?
log.Error("failed to sync chain but have new unconnected blocks from chain")
2019-11-15 02:27:43 +00:00
}
2019-12-08 11:44:47 +00:00
if sm.nextSyncTarget == nil && !sm.syncQueue.Empty() {
next := sm.syncQueue.Pop()
if next != nil {
sm.nextSyncTarget = next
sm.workerChan = sm.syncTargets
}
}
2019-11-13 17:03:56 +00:00
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) scheduleWorkSent() {
2019-11-16 21:36:21 +00:00
hts := sm.nextSyncTarget.heaviestTipSet()
sm.activeSyncs[hts.Key()] = hts
2019-12-08 11:44:47 +00:00
if !sm.syncQueue.Empty() {
2019-11-16 21:36:21 +00:00
sm.nextSyncTarget = sm.syncQueue.Pop()
} else {
sm.nextSyncTarget = nil
sm.workerChan = nil
}
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) syncWorker(id int) {
2019-11-16 01:05:16 +00:00
ss := &SyncerState{}
sm.syncStates[id] = ss
for {
select {
2019-11-15 21:35:29 +00:00
case ts, ok := <-sm.syncTargets:
if !ok {
log.Info("sync manager worker shutting down")
return
}
ctx := context.WithValue(context.TODO(), syncStateKey{}, ss)
2019-11-16 01:05:16 +00:00
err := sm.doSync(ctx, ts)
2019-11-15 21:35:29 +00:00
if err != nil {
log.Errorf("sync error: %+v", err)
}
2019-11-15 02:27:43 +00:00
2019-11-15 21:35:29 +00:00
sm.syncResults <- &syncResult{
ts: ts,
success: err == nil,
}
}
}
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) syncedPeerCount() int {
var count int
for _, ts := range sm.peerHeads {
if ts.Height() > 0 {
count++
}
}
return count
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) getBootstrapState() int {
2019-12-08 11:44:47 +00:00
sm.bssLk.Lock()
defer sm.bssLk.Unlock()
return sm.bootstrapState
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) setBootstrapState(v int) {
2019-12-08 11:44:47 +00:00
sm.bssLk.Lock()
defer sm.bssLk.Unlock()
sm.bootstrapState = v
}
2020-09-14 20:58:59 +00:00
func (sm *syncManager) IsBootstrapped() bool {
2019-12-08 11:44:47 +00:00
sm.bssLk.Lock()
defer sm.bssLk.Unlock()
return sm.bootstrapState == BSStateComplete
}