fix a race in the sync manager

1. SyncerState contains a mutex and should never be copied. Honestly, this case
was probably fine but it's just as easy to create a separate snapshot type and
easier to reason about.

2. We need to initialize the syncStates array once at start, before accessing
it. By each syncer state inside each worker, we were racing with calls to
`State()`. Again, this was probably benign, but I don't trust optimizing
compilers.
This commit is contained in:
Steven Allen 2020-10-10 08:31:04 -07:00
parent 58f13a7d5a
commit 7245ac2b69
3 changed files with 31 additions and 32 deletions

View File

@ -1726,7 +1726,7 @@ func VerifyElectionPoStVRF(ctx context.Context, worker address.Address, rand []b
return gen.VerifyVRF(ctx, worker, rand, evrf)
}
func (syncer *Syncer) State() []SyncerState {
func (syncer *Syncer) State() []SyncerStateSnapshot {
return syncer.syncmgr.State()
}

View File

@ -38,7 +38,7 @@ type SyncManager interface {
SetPeerHead(ctx context.Context, p peer.ID, ts *types.TipSet)
// State retrieves the state of the sync workers.
State() []SyncerState
State() []SyncerStateSnapshot
}
type syncManager struct {
@ -79,7 +79,7 @@ type syncResult struct {
const syncWorkerCount = 3
func NewSyncManager(sync SyncFunc) SyncManager {
return &syncManager{
sm := &syncManager{
bspThresh: 1,
peerHeads: make(map[peer.ID]*types.TipSet),
syncTargets: make(chan *types.TipSet),
@ -90,6 +90,10 @@ func NewSyncManager(sync SyncFunc) SyncManager {
doSync: sync,
stop: make(chan struct{}),
}
for i := range sm.syncStates {
sm.syncStates[i] = new(SyncerState)
}
return sm
}
func (sm *syncManager) Start() {
@ -128,8 +132,8 @@ func (sm *syncManager) SetPeerHead(ctx context.Context, p peer.ID, ts *types.Tip
sm.incomingTipSets <- ts
}
func (sm *syncManager) State() []SyncerState {
ret := make([]SyncerState, 0, len(sm.syncStates))
func (sm *syncManager) State() []SyncerStateSnapshot {
ret := make([]SyncerStateSnapshot, 0, len(sm.syncStates))
for _, s := range sm.syncStates {
ret = append(ret, s.Snapshot())
}
@ -405,8 +409,7 @@ func (sm *syncManager) scheduleWorkSent() {
}
func (sm *syncManager) syncWorker(id int) {
ss := &SyncerState{}
sm.syncStates[id] = ss
ss := sm.syncStates[id]
for {
select {
case ts, ok := <-sm.syncTargets:

View File

@ -11,8 +11,7 @@ import (
"github.com/filecoin-project/lotus/chain/types"
)
type SyncerState struct {
lk sync.Mutex
type SyncerStateSnapshot struct {
Target *types.TipSet
Base *types.TipSet
Stage api.SyncStateStage
@ -22,6 +21,11 @@ type SyncerState struct {
End time.Time
}
type SyncerState struct {
lk sync.Mutex
data SyncerStateSnapshot
}
func (ss *SyncerState) SetStage(v api.SyncStateStage) {
if ss == nil {
return
@ -29,9 +33,9 @@ func (ss *SyncerState) SetStage(v api.SyncStateStage) {
ss.lk.Lock()
defer ss.lk.Unlock()
ss.Stage = v
ss.data.Stage = v
if v == api.StageSyncComplete {
ss.End = build.Clock.Now()
ss.data.End = build.Clock.Now()
}
}
@ -42,13 +46,13 @@ func (ss *SyncerState) Init(base, target *types.TipSet) {
ss.lk.Lock()
defer ss.lk.Unlock()
ss.Target = target
ss.Base = base
ss.Stage = api.StageHeaders
ss.Height = 0
ss.Message = ""
ss.Start = build.Clock.Now()
ss.End = time.Time{}
ss.data.Target = target
ss.data.Base = base
ss.data.Stage = api.StageHeaders
ss.data.Height = 0
ss.data.Message = ""
ss.data.Start = build.Clock.Now()
ss.data.End = time.Time{}
}
func (ss *SyncerState) SetHeight(h abi.ChainEpoch) {
@ -58,7 +62,7 @@ func (ss *SyncerState) SetHeight(h abi.ChainEpoch) {
ss.lk.Lock()
defer ss.lk.Unlock()
ss.Height = h
ss.data.Height = h
}
func (ss *SyncerState) Error(err error) {
@ -68,21 +72,13 @@ func (ss *SyncerState) Error(err error) {
ss.lk.Lock()
defer ss.lk.Unlock()
ss.Message = err.Error()
ss.Stage = api.StageSyncErrored
ss.End = build.Clock.Now()
ss.data.Message = err.Error()
ss.data.Stage = api.StageSyncErrored
ss.data.End = build.Clock.Now()
}
func (ss *SyncerState) Snapshot() SyncerState {
func (ss *SyncerState) Snapshot() SyncerStateSnapshot {
ss.lk.Lock()
defer ss.lk.Unlock()
return SyncerState{
Base: ss.Base,
Target: ss.Target,
Stage: ss.Stage,
Height: ss.Height,
Message: ss.Message,
Start: ss.Start,
End: ss.End,
}
return ss.data
}