sched: Fix worker reenabling

This commit is contained in:
Łukasz Magiera 2020-10-30 18:01:37 +01:00
parent af1d45d969
commit 774e2ecebf
3 changed files with 78 additions and 16 deletions

View File

@ -10,6 +10,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"testing" "testing"
"time" "time"
@ -376,3 +377,59 @@ func TestRestartWorker(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.Empty(t, uf) require.Empty(t, uf)
} }
func TestReenableWorker(t *testing.T) {
logging.SetAllLoggers(logging.LevelDebug)
stores.HeartbeatInterval = 5 * time.Millisecond
ctx, done := context.WithCancel(context.Background())
defer done()
ds := datastore.NewMapDatastore()
m, lstor, stor, idx, cleanup := newTestMgr(ctx, t, ds)
defer cleanup()
localTasks := []sealtasks.TaskType{
sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch,
}
wds := datastore.NewMapDatastore()
arch := make(chan chan apres)
w := newLocalWorker(func() (ffiwrapper.Storage, error) {
return &testExec{apch: arch}, nil
}, WorkerConfig{
SealProof: 0,
TaskTypes: localTasks,
}, stor, lstor, idx, m, statestore.New(wds))
err := m.AddWorker(ctx, w)
require.NoError(t, err)
time.Sleep(time.Millisecond * 100)
// disable
atomic.StoreInt64(&w.testDisable, 1)
for i := 0; i < 100; i++ {
if !m.WorkerStats()[w.session].Enabled {
break
}
time.Sleep(time.Millisecond * 3)
}
require.False(t, m.WorkerStats()[w.session].Enabled)
// reenable
atomic.StoreInt64(&w.testDisable, 0)
for i := 0; i < 100; i++ {
if m.WorkerStats()[w.session].Enabled {
break
}
time.Sleep(time.Millisecond * 3)
}
require.True(t, m.WorkerStats()[w.session].Enabled)
}

View File

@ -104,14 +104,16 @@ func (sw *schedWorker) handleWorker() {
defer sw.heartbeatTimer.Stop() defer sw.heartbeatTimer.Stop()
for { for {
sched.workersLk.Lock() {
enabled := worker.enabled sched.workersLk.Lock()
sched.workersLk.Unlock() enabled := worker.enabled
sched.workersLk.Unlock()
// ask for more windows if we need them (non-blocking) // ask for more windows if we need them (non-blocking)
if enabled { if enabled {
if !sw.requestWindows() { if !sw.requestWindows() {
return // graceful shutdown return // graceful shutdown
}
} }
} }
@ -123,13 +125,10 @@ func (sw *schedWorker) handleWorker() {
} }
// session looks good // session looks good
if !enabled { sched.workersLk.Lock()
sched.workersLk.Lock() worker.enabled = true
worker.enabled = true // we'll send window requests on the next loop
sched.workersLk.Unlock() sched.workersLk.Unlock()
// we'll send window requests on the next loop
}
// wait for more tasks to be assigned by the main scheduler or for the worker // wait for more tasks to be assigned by the main scheduler or for the worker
// to finish precessing a task // to finish precessing a task

View File

@ -8,6 +8,7 @@ import (
"reflect" "reflect"
"runtime" "runtime"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/elastic/go-sysinfo" "github.com/elastic/go-sysinfo"
@ -51,8 +52,9 @@ type LocalWorker struct {
acceptTasks map[sealtasks.TaskType]struct{} acceptTasks map[sealtasks.TaskType]struct{}
running sync.WaitGroup running sync.WaitGroup
session uuid.UUID session uuid.UUID
closing chan struct{} testDisable int64
closing chan struct{}
} }
func newLocalWorker(executor ExecutorFunc, wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { func newLocalWorker(executor ExecutorFunc, wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker {
@ -501,6 +503,10 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
} }
func (l *LocalWorker) Session(ctx context.Context) (uuid.UUID, error) { func (l *LocalWorker) Session(ctx context.Context) (uuid.UUID, error) {
if atomic.LoadInt64(&l.testDisable) == 1 {
return uuid.UUID{}, xerrors.Errorf("disabled")
}
select { select {
case <-l.closing: case <-l.closing:
return ClosedWorkerID, nil return ClosedWorkerID, nil