sched: WIP Windows
This commit is contained in:
parent
a445979f1a
commit
ac7dc28cfb
436
sched.go
436
sched.go
@ -3,11 +3,11 @@ package sectorstorage
|
|||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
"context"
|
"context"
|
||||||
|
"math/rand"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/go-multierror"
|
|
||||||
"golang.org/x/xerrors"
|
"golang.org/x/xerrors"
|
||||||
|
|
||||||
"github.com/filecoin-project/specs-actors/actors/abi"
|
"github.com/filecoin-project/specs-actors/actors/abi"
|
||||||
@ -20,6 +20,11 @@ type schedPrioCtxKey int
|
|||||||
|
|
||||||
var SchedPriorityKey schedPrioCtxKey
|
var SchedPriorityKey schedPrioCtxKey
|
||||||
var DefaultSchedPriority = 0
|
var DefaultSchedPriority = 0
|
||||||
|
var SelectorTimeout = 5 * time.Second
|
||||||
|
|
||||||
|
var (
|
||||||
|
SchedWindows = 2
|
||||||
|
)
|
||||||
|
|
||||||
func getPriority(ctx context.Context) int {
|
func getPriority(ctx context.Context) int {
|
||||||
sp := ctx.Value(SchedPriorityKey)
|
sp := ctx.Value(SchedPriorityKey)
|
||||||
@ -56,11 +61,63 @@ type scheduler struct {
|
|||||||
watchClosing chan WorkerID
|
watchClosing chan WorkerID
|
||||||
workerClosing chan WorkerID
|
workerClosing chan WorkerID
|
||||||
|
|
||||||
schedule chan *workerRequest
|
schedule chan *workerRequest
|
||||||
workerFree chan WorkerID
|
windowRequests chan *schedWindowRequest
|
||||||
closing chan struct{}
|
|
||||||
|
|
||||||
schedQueue *requestQueue
|
// owned by the sh.runSched goroutine
|
||||||
|
schedQueue *requestQueue
|
||||||
|
openWindows []*schedWindowRequest
|
||||||
|
|
||||||
|
closing chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
type workerHandle struct {
|
||||||
|
w Worker
|
||||||
|
|
||||||
|
info storiface.WorkerInfo
|
||||||
|
|
||||||
|
preparing *activeResources
|
||||||
|
active *activeResources
|
||||||
|
}
|
||||||
|
|
||||||
|
type schedWindowRequest struct {
|
||||||
|
worker WorkerID
|
||||||
|
|
||||||
|
done chan *schedWindow
|
||||||
|
}
|
||||||
|
|
||||||
|
type schedWindow struct {
|
||||||
|
worker WorkerID
|
||||||
|
allocated *activeResources
|
||||||
|
todo []*workerRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
type activeResources struct {
|
||||||
|
memUsedMin uint64
|
||||||
|
memUsedMax uint64
|
||||||
|
gpuUsed bool
|
||||||
|
cpuUse uint64
|
||||||
|
|
||||||
|
cond *sync.Cond
|
||||||
|
}
|
||||||
|
|
||||||
|
type workerRequest struct {
|
||||||
|
sector abi.SectorID
|
||||||
|
taskType sealtasks.TaskType
|
||||||
|
priority int // larger values more important
|
||||||
|
sel WorkerSelector
|
||||||
|
|
||||||
|
prepare WorkerAction
|
||||||
|
work WorkerAction
|
||||||
|
|
||||||
|
index int // The index of the item in the heap.
|
||||||
|
|
||||||
|
ret chan<- workerResponse
|
||||||
|
ctx context.Context
|
||||||
|
}
|
||||||
|
|
||||||
|
type workerResponse struct {
|
||||||
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
func newScheduler(spt abi.RegisteredSealProof) *scheduler {
|
func newScheduler(spt abi.RegisteredSealProof) *scheduler {
|
||||||
@ -75,9 +132,8 @@ func newScheduler(spt abi.RegisteredSealProof) *scheduler {
|
|||||||
watchClosing: make(chan WorkerID),
|
watchClosing: make(chan WorkerID),
|
||||||
workerClosing: make(chan WorkerID),
|
workerClosing: make(chan WorkerID),
|
||||||
|
|
||||||
schedule: make(chan *workerRequest),
|
schedule: make(chan *workerRequest),
|
||||||
workerFree: make(chan WorkerID),
|
closing: make(chan struct{}),
|
||||||
closing: make(chan struct{}),
|
|
||||||
|
|
||||||
schedQueue: &requestQueue{},
|
schedQueue: &requestQueue{},
|
||||||
}
|
}
|
||||||
@ -115,25 +171,6 @@ func (sh *scheduler) Schedule(ctx context.Context, sector abi.SectorID, taskType
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type workerRequest struct {
|
|
||||||
sector abi.SectorID
|
|
||||||
taskType sealtasks.TaskType
|
|
||||||
priority int // larger values more important
|
|
||||||
sel WorkerSelector
|
|
||||||
|
|
||||||
prepare WorkerAction
|
|
||||||
work WorkerAction
|
|
||||||
|
|
||||||
index int // The index of the item in the heap.
|
|
||||||
|
|
||||||
ret chan<- workerResponse
|
|
||||||
ctx context.Context
|
|
||||||
}
|
|
||||||
|
|
||||||
type workerResponse struct {
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *workerRequest) respond(err error) {
|
func (r *workerRequest) respond(err error) {
|
||||||
select {
|
select {
|
||||||
case r.ret <- workerResponse{err: err}:
|
case r.ret <- workerResponse{err: err}:
|
||||||
@ -142,46 +179,25 @@ func (r *workerRequest) respond(err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type activeResources struct {
|
|
||||||
memUsedMin uint64
|
|
||||||
memUsedMax uint64
|
|
||||||
gpuUsed bool
|
|
||||||
cpuUse uint64
|
|
||||||
|
|
||||||
cond *sync.Cond
|
|
||||||
}
|
|
||||||
|
|
||||||
type workerHandle struct {
|
|
||||||
w Worker
|
|
||||||
|
|
||||||
info storiface.WorkerInfo
|
|
||||||
|
|
||||||
preparing *activeResources
|
|
||||||
active *activeResources
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sh *scheduler) runSched() {
|
func (sh *scheduler) runSched() {
|
||||||
go sh.runWorkerWatcher()
|
go sh.runWorkerWatcher()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case w := <-sh.newWorkers:
|
case w := <-sh.newWorkers:
|
||||||
sh.schedNewWorker(w)
|
sh.newWorker(w)
|
||||||
case wid := <-sh.workerClosing:
|
|
||||||
sh.schedDropWorker(wid)
|
|
||||||
case req := <-sh.schedule:
|
|
||||||
scheduled, err := sh.maybeSchedRequest(req)
|
|
||||||
if err != nil {
|
|
||||||
req.respond(err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if scheduled {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
|
case wid := <-sh.workerClosing:
|
||||||
|
sh.dropWorker(wid)
|
||||||
|
|
||||||
|
case req := <-sh.schedule:
|
||||||
heap.Push(sh.schedQueue, req)
|
heap.Push(sh.schedQueue, req)
|
||||||
case wid := <-sh.workerFree:
|
sh.trySched()
|
||||||
sh.onWorkerFreed(wid)
|
|
||||||
|
case req := <-sh.windowRequests:
|
||||||
|
sh.openWindows = append(sh.openWindows, req)
|
||||||
|
sh.trySched()
|
||||||
|
|
||||||
case <-sh.closing:
|
case <-sh.closing:
|
||||||
sh.schedClose()
|
sh.schedClose()
|
||||||
return
|
return
|
||||||
@ -189,169 +205,161 @@ func (sh *scheduler) runSched() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sh *scheduler) onWorkerFreed(wid WorkerID) {
|
func (sh *scheduler) trySched() {
|
||||||
sh.workersLk.Lock()
|
/*
|
||||||
w, ok := sh.workers[wid]
|
This assigns tasks to workers based on:
|
||||||
sh.workersLk.Unlock()
|
- Task priority (achieved by handling sh.schedQueue in order, since it's already sorted by priority)
|
||||||
if !ok {
|
- Worker resource availability
|
||||||
log.Warnf("onWorkerFreed on invalid worker %d", wid)
|
- Task-specified worker preference (acceptableWindows array below sorted by this preference)
|
||||||
|
- Window request age
|
||||||
|
|
||||||
|
1. For each task in the schedQueue find windows which can handle them
|
||||||
|
1.1. Create list of windows capable of handling a task
|
||||||
|
1.2. Sort windows according to task selector preferences
|
||||||
|
2. Going through schedQueue again, assign task to first acceptable window
|
||||||
|
with resources available
|
||||||
|
3. Submit windows with scheduled tasks to workers
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
windows := make([]schedWindow, len(sh.openWindows))
|
||||||
|
acceptableWindows := make([][]int, sh.schedQueue.Len())
|
||||||
|
|
||||||
|
// Step 1
|
||||||
|
for sqi := 0; sqi < sh.schedQueue.Len(); sqi++ {
|
||||||
|
task := (*sh.schedQueue)[sqi]
|
||||||
|
needRes := ResourceTable[task.taskType][sh.spt]
|
||||||
|
|
||||||
|
for wnd, windowRequest := range sh.openWindows {
|
||||||
|
worker := sh.workers[windowRequest.worker]
|
||||||
|
|
||||||
|
// TODO: allow bigger windows
|
||||||
|
if !windows[wnd].allocated.canHandleRequest(needRes, windowRequest.worker, worker.info.Resources) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ok, err := task.sel.Ok(task.ctx, task.taskType, sh.spt, worker)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("trySched(1) req.sel.Ok error: %+v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
acceptableWindows[sqi] = append(acceptableWindows[sqi], wnd)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(acceptableWindows[sqi]) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick best worker (shuffle in case some workers are equally as good)
|
||||||
|
rand.Shuffle(len(acceptableWindows[sqi]), func(i, j int) {
|
||||||
|
acceptableWindows[sqi][i], acceptableWindows[sqi][j] = acceptableWindows[sqi][j], acceptableWindows[sqi][i]
|
||||||
|
})
|
||||||
|
sort.SliceStable(acceptableWindows, func(i, j int) bool {
|
||||||
|
wii := sh.openWindows[acceptableWindows[sqi][i]].worker
|
||||||
|
wji := sh.openWindows[acceptableWindows[sqi][j]].worker
|
||||||
|
|
||||||
|
if wii == wji {
|
||||||
|
// for the same worker prefer older windows
|
||||||
|
return acceptableWindows[sqi][i] < acceptableWindows[sqi][j]
|
||||||
|
}
|
||||||
|
|
||||||
|
wi := sh.workers[wii]
|
||||||
|
wj := sh.workers[wji]
|
||||||
|
|
||||||
|
rpcCtx, cancel := context.WithTimeout(task.ctx, SelectorTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
r, err := task.sel.Cmp(rpcCtx, task.taskType, wi, wj)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("selecting best worker: %s", err)
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2
|
||||||
|
scheduled := 0
|
||||||
|
|
||||||
|
for sqi := 0; sqi < sh.schedQueue.Len(); sqi++ {
|
||||||
|
task := (*sh.schedQueue)[sqi]
|
||||||
|
needRes := ResourceTable[task.taskType][sh.spt]
|
||||||
|
|
||||||
|
selectedWindow := -1
|
||||||
|
for _, wnd := range acceptableWindows[sqi+scheduled] {
|
||||||
|
wid := sh.openWindows[wnd].worker
|
||||||
|
wr := sh.workers[wid].info.Resources
|
||||||
|
|
||||||
|
// TODO: allow bigger windows
|
||||||
|
if windows[wnd].allocated.canHandleRequest(needRes, wid, wr) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
windows[wnd].allocated.add(wr, needRes)
|
||||||
|
|
||||||
|
selectedWindow = wnd
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
windows[selectedWindow].todo = append(windows[selectedWindow].todo, task)
|
||||||
|
|
||||||
|
heap.Remove(sh.schedQueue, sqi)
|
||||||
|
sqi--
|
||||||
|
scheduled++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3
|
||||||
|
|
||||||
|
if scheduled == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < sh.schedQueue.Len(); i++ {
|
scheduledWindows := map[int]struct{}{}
|
||||||
req := (*sh.schedQueue)[i]
|
for wnd, window := range windows {
|
||||||
|
if len(window.todo) == 0 {
|
||||||
ok, err := req.sel.Ok(req.ctx, req.taskType, sh.spt, w)
|
// Nothing scheduled here, keep the window open
|
||||||
if err != nil {
|
|
||||||
log.Errorf("onWorkerFreed req.sel.Ok error: %+v", err)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if !ok {
|
scheduledWindows[wnd] = struct{}{}
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
scheduled, err := sh.maybeSchedRequest(req)
|
select {
|
||||||
if err != nil {
|
case sh.openWindows[wnd].done <- &window:
|
||||||
req.respond(err)
|
default:
|
||||||
continue
|
log.Error("expected sh.openWindows[wnd].done to be buffered")
|
||||||
}
|
|
||||||
|
|
||||||
if scheduled {
|
|
||||||
heap.Remove(sh.schedQueue, i)
|
|
||||||
i--
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Rewrite sh.openWindows array, removing scheduled windows
|
||||||
|
newOpenWindows := make([]*schedWindowRequest, 0, len(sh.openWindows)-len(scheduledWindows))
|
||||||
|
for wnd, window := range sh.openWindows {
|
||||||
|
if _, scheduled := scheduledWindows[wnd]; !scheduled {
|
||||||
|
// keep unscheduled windows open
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
newOpenWindows = append(newOpenWindows, window)
|
||||||
|
}
|
||||||
|
|
||||||
|
sh.openWindows = newOpenWindows
|
||||||
}
|
}
|
||||||
|
|
||||||
var selectorTimeout = 5 * time.Second
|
func (sh *scheduler) runWorker(wid WorkerID) {
|
||||||
|
w := sh.workers[wid]
|
||||||
func (sh *scheduler) maybeSchedRequest(req *workerRequest) (bool, error) {
|
|
||||||
sh.workersLk.Lock()
|
|
||||||
defer sh.workersLk.Unlock()
|
|
||||||
|
|
||||||
tried := 0
|
|
||||||
var acceptable []WorkerID
|
|
||||||
|
|
||||||
needRes := ResourceTable[req.taskType][sh.spt]
|
|
||||||
|
|
||||||
for wid, worker := range sh.workers {
|
|
||||||
rpcCtx, cancel := context.WithTimeout(req.ctx, selectorTimeout)
|
|
||||||
ok, err := req.sel.Ok(rpcCtx, req.taskType, sh.spt, worker)
|
|
||||||
cancel()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
tried++
|
|
||||||
|
|
||||||
if !canHandleRequest(needRes, wid, worker.info.Resources, worker.preparing) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
acceptable = append(acceptable, wid)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(acceptable) > 0 {
|
|
||||||
{
|
|
||||||
var serr error
|
|
||||||
|
|
||||||
sort.SliceStable(acceptable, func(i, j int) bool {
|
|
||||||
rpcCtx, cancel := context.WithTimeout(req.ctx, selectorTimeout)
|
|
||||||
defer cancel()
|
|
||||||
r, err := req.sel.Cmp(rpcCtx, req.taskType, sh.workers[acceptable[i]], sh.workers[acceptable[j]])
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
serr = multierror.Append(serr, err)
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
})
|
|
||||||
|
|
||||||
if serr != nil {
|
|
||||||
return false, xerrors.Errorf("error(s) selecting best worker: %w", serr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, sh.assignWorker(acceptable[0], sh.workers[acceptable[0]], req)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tried == 0 {
|
|
||||||
return false, xerrors.New("maybeSchedRequest didn't find any good workers")
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, nil // put in waiting queue
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sh *scheduler) assignWorker(wid WorkerID, w *workerHandle, req *workerRequest) error {
|
|
||||||
needRes := ResourceTable[req.taskType][sh.spt]
|
|
||||||
|
|
||||||
w.preparing.add(w.info.Resources, needRes)
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
err := req.prepare(req.ctx, w.w)
|
for {
|
||||||
sh.workersLk.Lock()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
w.preparing.free(w.info.Resources, needRes)
|
|
||||||
sh.workersLk.Unlock()
|
|
||||||
|
|
||||||
select {
|
|
||||||
case sh.workerFree <- wid:
|
|
||||||
case <-sh.closing:
|
|
||||||
log.Warnf("scheduler closed while sending response (prepare error: %+v)", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
|
||||||
case req.ret <- workerResponse{err: err}:
|
|
||||||
case <-req.ctx.Done():
|
|
||||||
log.Warnf("request got cancelled before we could respond (prepare error: %+v)", err)
|
|
||||||
case <-sh.closing:
|
|
||||||
log.Warnf("scheduler closed while sending response (prepare error: %+v)", err)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
err = w.active.withResources(wid, w.info.Resources, needRes, &sh.workersLk, func() error {
|
|
||||||
w.preparing.free(w.info.Resources, needRes)
|
|
||||||
sh.workersLk.Unlock()
|
|
||||||
defer sh.workersLk.Lock() // we MUST return locked from this function
|
|
||||||
|
|
||||||
select {
|
|
||||||
case sh.workerFree <- wid:
|
|
||||||
case <-sh.closing:
|
|
||||||
}
|
|
||||||
|
|
||||||
err = req.work(req.ctx, w.w)
|
|
||||||
|
|
||||||
select {
|
|
||||||
case req.ret <- workerResponse{err: err}:
|
|
||||||
case <-req.ctx.Done():
|
|
||||||
log.Warnf("request got cancelled before we could respond")
|
|
||||||
case <-sh.closing:
|
|
||||||
log.Warnf("scheduler closed while sending response")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
sh.workersLk.Unlock()
|
|
||||||
|
|
||||||
// This error should always be nil, since nothing is setting it, but just to be safe:
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("error executing worker (withResources): %+v", err)
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *activeResources) withResources(id WorkerID, wr storiface.WorkerResources, r Resources, locker sync.Locker, cb func() error) error {
|
func (a *activeResources) withResources(id WorkerID, wr storiface.WorkerResources, r Resources, locker sync.Locker, cb func() error) error {
|
||||||
for !canHandleRequest(r, id, wr, a) {
|
for !a.canHandleRequest(r, id, wr) {
|
||||||
if a.cond == nil {
|
if a.cond == nil {
|
||||||
a.cond = sync.NewCond(locker)
|
a.cond = sync.NewCond(locker)
|
||||||
}
|
}
|
||||||
@ -396,16 +404,16 @@ func (a *activeResources) free(wr storiface.WorkerResources, r Resources) {
|
|||||||
a.memUsedMax -= r.MaxMemory
|
a.memUsedMax -= r.MaxMemory
|
||||||
}
|
}
|
||||||
|
|
||||||
func canHandleRequest(needRes Resources, wid WorkerID, res storiface.WorkerResources, active *activeResources) bool {
|
func (a *activeResources) canHandleRequest(needRes Resources, wid WorkerID, res storiface.WorkerResources) bool {
|
||||||
|
|
||||||
// TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running)
|
// TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running)
|
||||||
minNeedMem := res.MemReserved + active.memUsedMin + needRes.MinMemory + needRes.BaseMinMemory
|
minNeedMem := res.MemReserved + a.memUsedMin + needRes.MinMemory + needRes.BaseMinMemory
|
||||||
if minNeedMem > res.MemPhysical {
|
if minNeedMem > res.MemPhysical {
|
||||||
log.Debugf("sched: not scheduling on worker %d; not enough physical memory - need: %dM, have %dM", wid, minNeedMem/mib, res.MemPhysical/mib)
|
log.Debugf("sched: not scheduling on worker %d; not enough physical memory - need: %dM, have %dM", wid, minNeedMem/mib, res.MemPhysical/mib)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
maxNeedMem := res.MemReserved + active.memUsedMax + needRes.MaxMemory + needRes.BaseMinMemory
|
maxNeedMem := res.MemReserved + a.memUsedMax + needRes.MaxMemory + needRes.BaseMinMemory
|
||||||
|
|
||||||
if maxNeedMem > res.MemSwap+res.MemPhysical {
|
if maxNeedMem > res.MemSwap+res.MemPhysical {
|
||||||
log.Debugf("sched: not scheduling on worker %d; not enough virtual memory - need: %dM, have %dM", wid, maxNeedMem/mib, (res.MemSwap+res.MemPhysical)/mib)
|
log.Debugf("sched: not scheduling on worker %d; not enough virtual memory - need: %dM, have %dM", wid, maxNeedMem/mib, (res.MemSwap+res.MemPhysical)/mib)
|
||||||
@ -413,19 +421,19 @@ func canHandleRequest(needRes Resources, wid WorkerID, res storiface.WorkerResou
|
|||||||
}
|
}
|
||||||
|
|
||||||
if needRes.MultiThread() {
|
if needRes.MultiThread() {
|
||||||
if active.cpuUse > 0 {
|
if a.cpuUse > 0 {
|
||||||
log.Debugf("sched: not scheduling on worker %d; multicore process needs %d threads, %d in use, target %d", wid, res.CPUs, active.cpuUse, res.CPUs)
|
log.Debugf("sched: not scheduling on worker %d; multicore process needs %d threads, %d in use, target %d", wid, res.CPUs, a.cpuUse, res.CPUs)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if active.cpuUse+uint64(needRes.Threads) > res.CPUs {
|
if a.cpuUse+uint64(needRes.Threads) > res.CPUs {
|
||||||
log.Debugf("sched: not scheduling on worker %d; not enough threads, need %d, %d in use, target %d", wid, needRes.Threads, active.cpuUse, res.CPUs)
|
log.Debugf("sched: not scheduling on worker %d; not enough threads, need %d, %d in use, target %d", wid, needRes.Threads, a.cpuUse, res.CPUs)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(res.GPUs) > 0 && needRes.CanGPU {
|
if len(res.GPUs) > 0 && needRes.CanGPU {
|
||||||
if active.gpuUsed {
|
if a.gpuUsed {
|
||||||
log.Debugf("sched: not scheduling on worker %d; GPU in use", wid)
|
log.Debugf("sched: not scheduling on worker %d; GPU in use", wid)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -453,7 +461,7 @@ func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
|
|||||||
return max
|
return max
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sh *scheduler) schedNewWorker(w *workerHandle) {
|
func (sh *scheduler) newWorker(w *workerHandle) {
|
||||||
sh.workersLk.Lock()
|
sh.workersLk.Lock()
|
||||||
|
|
||||||
id := sh.nextWorker
|
id := sh.nextWorker
|
||||||
@ -468,10 +476,10 @@ func (sh *scheduler) schedNewWorker(w *workerHandle) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
sh.onWorkerFreed(id)
|
sh.runWorker(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sh *scheduler) schedDropWorker(wid WorkerID) {
|
func (sh *scheduler) dropWorker(wid WorkerID) {
|
||||||
sh.workersLk.Lock()
|
sh.workersLk.Lock()
|
||||||
defer sh.workersLk.Unlock()
|
defer sh.workersLk.Unlock()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user