fix a race in the sync manager

1. SyncerState contains a mutex and should never be copied. Honestly, this case
was probably fine but it's just as easy to create a separate snapshot type and
easier to reason about.

2. We need to initialize the syncStates array once at start, before accessing
it. By each syncer state inside each worker, we were racing with calls to
`State()`. Again, this was probably benign, but I don't trust optimizing
compilers.
This commit is contained in:
Steven Allen 2020-10-10 08:31:04 -07:00
parent 58f13a7d5a
commit 7245ac2b69
3 changed files with 31 additions and 32 deletions

View File

@ -1726,7 +1726,7 @@ func VerifyElectionPoStVRF(ctx context.Context, worker address.Address, rand []b
return gen.VerifyVRF(ctx, worker, rand, evrf) return gen.VerifyVRF(ctx, worker, rand, evrf)
} }
func (syncer *Syncer) State() []SyncerState { func (syncer *Syncer) State() []SyncerStateSnapshot {
return syncer.syncmgr.State() return syncer.syncmgr.State()
} }

View File

@ -38,7 +38,7 @@ type SyncManager interface {
SetPeerHead(ctx context.Context, p peer.ID, ts *types.TipSet) SetPeerHead(ctx context.Context, p peer.ID, ts *types.TipSet)
// State retrieves the state of the sync workers. // State retrieves the state of the sync workers.
State() []SyncerState State() []SyncerStateSnapshot
} }
type syncManager struct { type syncManager struct {
@ -79,7 +79,7 @@ type syncResult struct {
const syncWorkerCount = 3 const syncWorkerCount = 3
func NewSyncManager(sync SyncFunc) SyncManager { func NewSyncManager(sync SyncFunc) SyncManager {
return &syncManager{ sm := &syncManager{
bspThresh: 1, bspThresh: 1,
peerHeads: make(map[peer.ID]*types.TipSet), peerHeads: make(map[peer.ID]*types.TipSet),
syncTargets: make(chan *types.TipSet), syncTargets: make(chan *types.TipSet),
@ -90,6 +90,10 @@ func NewSyncManager(sync SyncFunc) SyncManager {
doSync: sync, doSync: sync,
stop: make(chan struct{}), stop: make(chan struct{}),
} }
for i := range sm.syncStates {
sm.syncStates[i] = new(SyncerState)
}
return sm
} }
func (sm *syncManager) Start() { func (sm *syncManager) Start() {
@ -128,8 +132,8 @@ func (sm *syncManager) SetPeerHead(ctx context.Context, p peer.ID, ts *types.Tip
sm.incomingTipSets <- ts sm.incomingTipSets <- ts
} }
func (sm *syncManager) State() []SyncerState { func (sm *syncManager) State() []SyncerStateSnapshot {
ret := make([]SyncerState, 0, len(sm.syncStates)) ret := make([]SyncerStateSnapshot, 0, len(sm.syncStates))
for _, s := range sm.syncStates { for _, s := range sm.syncStates {
ret = append(ret, s.Snapshot()) ret = append(ret, s.Snapshot())
} }
@ -405,8 +409,7 @@ func (sm *syncManager) scheduleWorkSent() {
} }
func (sm *syncManager) syncWorker(id int) { func (sm *syncManager) syncWorker(id int) {
ss := &SyncerState{} ss := sm.syncStates[id]
sm.syncStates[id] = ss
for { for {
select { select {
case ts, ok := <-sm.syncTargets: case ts, ok := <-sm.syncTargets:

View File

@ -11,8 +11,7 @@ import (
"github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/types"
) )
type SyncerState struct { type SyncerStateSnapshot struct {
lk sync.Mutex
Target *types.TipSet Target *types.TipSet
Base *types.TipSet Base *types.TipSet
Stage api.SyncStateStage Stage api.SyncStateStage
@ -22,6 +21,11 @@ type SyncerState struct {
End time.Time End time.Time
} }
type SyncerState struct {
lk sync.Mutex
data SyncerStateSnapshot
}
func (ss *SyncerState) SetStage(v api.SyncStateStage) { func (ss *SyncerState) SetStage(v api.SyncStateStage) {
if ss == nil { if ss == nil {
return return
@ -29,9 +33,9 @@ func (ss *SyncerState) SetStage(v api.SyncStateStage) {
ss.lk.Lock() ss.lk.Lock()
defer ss.lk.Unlock() defer ss.lk.Unlock()
ss.Stage = v ss.data.Stage = v
if v == api.StageSyncComplete { if v == api.StageSyncComplete {
ss.End = build.Clock.Now() ss.data.End = build.Clock.Now()
} }
} }
@ -42,13 +46,13 @@ func (ss *SyncerState) Init(base, target *types.TipSet) {
ss.lk.Lock() ss.lk.Lock()
defer ss.lk.Unlock() defer ss.lk.Unlock()
ss.Target = target ss.data.Target = target
ss.Base = base ss.data.Base = base
ss.Stage = api.StageHeaders ss.data.Stage = api.StageHeaders
ss.Height = 0 ss.data.Height = 0
ss.Message = "" ss.data.Message = ""
ss.Start = build.Clock.Now() ss.data.Start = build.Clock.Now()
ss.End = time.Time{} ss.data.End = time.Time{}
} }
func (ss *SyncerState) SetHeight(h abi.ChainEpoch) { func (ss *SyncerState) SetHeight(h abi.ChainEpoch) {
@ -58,7 +62,7 @@ func (ss *SyncerState) SetHeight(h abi.ChainEpoch) {
ss.lk.Lock() ss.lk.Lock()
defer ss.lk.Unlock() defer ss.lk.Unlock()
ss.Height = h ss.data.Height = h
} }
func (ss *SyncerState) Error(err error) { func (ss *SyncerState) Error(err error) {
@ -68,21 +72,13 @@ func (ss *SyncerState) Error(err error) {
ss.lk.Lock() ss.lk.Lock()
defer ss.lk.Unlock() defer ss.lk.Unlock()
ss.Message = err.Error() ss.data.Message = err.Error()
ss.Stage = api.StageSyncErrored ss.data.Stage = api.StageSyncErrored
ss.End = build.Clock.Now() ss.data.End = build.Clock.Now()
} }
func (ss *SyncerState) Snapshot() SyncerState { func (ss *SyncerState) Snapshot() SyncerStateSnapshot {
ss.lk.Lock() ss.lk.Lock()
defer ss.lk.Unlock() defer ss.lk.Unlock()
return SyncerState{ return ss.data
Base: ss.Base,
Target: ss.Target,
Stage: ss.Stage,
Height: ss.Height,
Message: ss.Message,
Start: ss.Start,
End: ss.End,
}
} }