feat: sealing: Put scheduler assign logic behind an interface

This commit is contained in:
Łukasz Magiera 2022-05-18 15:47:08 +02:00
parent df98a2a089
commit 9ac19cb14b
13 changed files with 506 additions and 480 deletions

View File

@ -62,7 +62,7 @@ type Manager struct {
remoteHnd *stores.FetchHandler remoteHnd *stores.FetchHandler
index stores.SectorIndex index stores.SectorIndex
sched *scheduler sched *Scheduler
windowPoStSched *poStScheduler windowPoStSched *poStScheduler
winningPoStSched *poStScheduler winningPoStSched *poStScheduler

View File

@ -2,34 +2,34 @@ package sectorstorage
import "sort" import "sort"
type requestQueue []*workerRequest type RequestQueue []*WorkerRequest
func (q requestQueue) Len() int { return len(q) } func (q RequestQueue) Len() int { return len(q) }
func (q requestQueue) Less(i, j int) bool { func (q RequestQueue) Less(i, j int) bool {
oneMuchLess, muchLess := q[i].taskType.MuchLess(q[j].taskType) oneMuchLess, muchLess := q[i].TaskType.MuchLess(q[j].TaskType)
if oneMuchLess { if oneMuchLess {
return muchLess return muchLess
} }
if q[i].priority != q[j].priority { if q[i].Priority != q[j].Priority {
return q[i].priority > q[j].priority return q[i].Priority > q[j].Priority
} }
if q[i].taskType != q[j].taskType { if q[i].TaskType != q[j].TaskType {
return q[i].taskType.Less(q[j].taskType) return q[i].TaskType.Less(q[j].TaskType)
} }
return q[i].sector.ID.Number < q[j].sector.ID.Number // optimize minerActor.NewSectors bitfield return q[i].Sector.ID.Number < q[j].Sector.ID.Number // optimize minerActor.NewSectors bitfield
} }
func (q requestQueue) Swap(i, j int) { func (q RequestQueue) Swap(i, j int) {
q[i], q[j] = q[j], q[i] q[i], q[j] = q[j], q[i]
q[i].index = i q[i].index = i
q[j].index = j q[j].index = j
} }
func (q *requestQueue) Push(x *workerRequest) { func (q *RequestQueue) Push(x *WorkerRequest) {
n := len(*q) n := len(*q)
item := x item := x
item.index = n item.index = n
@ -37,7 +37,7 @@ func (q *requestQueue) Push(x *workerRequest) {
sort.Sort(q) sort.Sort(q)
} }
func (q *requestQueue) Remove(i int) *workerRequest { func (q *RequestQueue) Remove(i int) *WorkerRequest {
old := *q old := *q
n := len(old) n := len(old)
item := old[i] item := old[i]

View File

@ -8,13 +8,13 @@ import (
) )
func TestRequestQueue(t *testing.T) { func TestRequestQueue(t *testing.T) {
rq := &requestQueue{} rq := &RequestQueue{}
rq.Push(&workerRequest{taskType: sealtasks.TTAddPiece}) rq.Push(&WorkerRequest{TaskType: sealtasks.TTAddPiece})
rq.Push(&workerRequest{taskType: sealtasks.TTPreCommit1}) rq.Push(&WorkerRequest{TaskType: sealtasks.TTPreCommit1})
rq.Push(&workerRequest{taskType: sealtasks.TTPreCommit2}) rq.Push(&WorkerRequest{TaskType: sealtasks.TTPreCommit2})
rq.Push(&workerRequest{taskType: sealtasks.TTPreCommit1}) rq.Push(&WorkerRequest{TaskType: sealtasks.TTPreCommit1})
rq.Push(&workerRequest{taskType: sealtasks.TTAddPiece}) rq.Push(&WorkerRequest{TaskType: sealtasks.TTAddPiece})
dump := func(s string) { dump := func(s string) {
fmt.Println("---") fmt.Println("---")
@ -22,7 +22,7 @@ func TestRequestQueue(t *testing.T) {
for sqi := 0; sqi < rq.Len(); sqi++ { for sqi := 0; sqi < rq.Len(); sqi++ {
task := (*rq)[sqi] task := (*rq)[sqi]
fmt.Println(sqi, task.taskType) fmt.Println(sqi, task.TaskType)
} }
} }
@ -32,31 +32,31 @@ func TestRequestQueue(t *testing.T) {
dump("pop 1") dump("pop 1")
if pt.taskType != sealtasks.TTPreCommit2 { if pt.TaskType != sealtasks.TTPreCommit2 {
t.Error("expected precommit2, got", pt.taskType) t.Error("expected precommit2, got", pt.TaskType)
} }
pt = rq.Remove(0) pt = rq.Remove(0)
dump("pop 2") dump("pop 2")
if pt.taskType != sealtasks.TTPreCommit1 { if pt.TaskType != sealtasks.TTPreCommit1 {
t.Error("expected precommit1, got", pt.taskType) t.Error("expected precommit1, got", pt.TaskType)
} }
pt = rq.Remove(1) pt = rq.Remove(1)
dump("pop 3") dump("pop 3")
if pt.taskType != sealtasks.TTAddPiece { if pt.TaskType != sealtasks.TTAddPiece {
t.Error("expected addpiece, got", pt.taskType) t.Error("expected addpiece, got", pt.TaskType)
} }
pt = rq.Remove(0) pt = rq.Remove(0)
dump("pop 4") dump("pop 4")
if pt.taskType != sealtasks.TTPreCommit1 { if pt.TaskType != sealtasks.TTPreCommit1 {
t.Error("expected precommit1, got", pt.taskType) t.Error("expected precommit1, got", pt.TaskType)
} }
} }

View File

@ -2,9 +2,6 @@ package sectorstorage
import ( import (
"context" "context"
"math"
"math/rand"
"sort"
"sync" "sync"
"time" "time"
@ -47,23 +44,26 @@ const mib = 1 << 20
type WorkerAction func(ctx context.Context, w Worker) error type WorkerAction func(ctx context.Context, w Worker) error
type WorkerSelector interface { type WorkerSelector interface {
Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *workerHandle) (bool, error) // true if worker is acceptable for performing a task Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *WorkerHandle) (bool, error) // true if worker is acceptable for performing a task
Cmp(ctx context.Context, task sealtasks.TaskType, a, b *workerHandle) (bool, error) // true if a is preferred over b Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) // true if a is preferred over b
} }
type scheduler struct { type Scheduler struct {
workersLk sync.RWMutex assigner Assigner
workers map[storiface.WorkerID]*workerHandle
schedule chan *workerRequest workersLk sync.RWMutex
windowRequests chan *schedWindowRequest
Workers map[storiface.WorkerID]*WorkerHandle
schedule chan *WorkerRequest
windowRequests chan *SchedWindowRequest
workerChange chan struct{} // worker added / changed/freed resources workerChange chan struct{} // worker added / changed/freed resources
workerDisable chan workerDisableReq workerDisable chan workerDisableReq
// owned by the sh.runSched goroutine // owned by the sh.runSched goroutine
schedQueue *requestQueue SchedQueue *RequestQueue
openWindows []*schedWindowRequest OpenWindows []*SchedWindowRequest
workTracker *workTracker workTracker *workTracker
@ -74,24 +74,24 @@ type scheduler struct {
testSync chan struct{} // used for testing testSync chan struct{} // used for testing
} }
type workerHandle struct { type WorkerHandle struct {
workerRpc Worker workerRpc Worker
tasksCache map[sealtasks.TaskType]struct{} tasksCache map[sealtasks.TaskType]struct{}
tasksUpdate time.Time tasksUpdate time.Time
tasksLk sync.Mutex tasksLk sync.Mutex
info storiface.WorkerInfo Info storiface.WorkerInfo
preparing *activeResources // use with workerHandle.lk preparing *activeResources // use with WorkerHandle.lk
active *activeResources // use with workerHandle.lk active *activeResources // use with WorkerHandle.lk
lk sync.Mutex // can be taken inside sched.workersLk.RLock lk sync.Mutex // can be taken inside sched.workersLk.RLock
wndLk sync.Mutex // can be taken inside sched.workersLk.RLock wndLk sync.Mutex // can be taken inside sched.workersLk.RLock
activeWindows []*schedWindow activeWindows []*SchedWindow
enabled bool Enabled bool
// for sync manager goroutine closing // for sync manager goroutine closing
cleanupStarted bool cleanupStarted bool
@ -99,19 +99,19 @@ type workerHandle struct {
closingMgr chan struct{} closingMgr chan struct{}
} }
type schedWindowRequest struct { type SchedWindowRequest struct {
worker storiface.WorkerID Worker storiface.WorkerID
done chan *schedWindow Done chan *SchedWindow
} }
type schedWindow struct { type SchedWindow struct {
allocated activeResources Allocated activeResources
todo []*workerRequest Todo []*WorkerRequest
} }
type workerDisableReq struct { type workerDisableReq struct {
activeWindows []*schedWindow activeWindows []*SchedWindow
wid storiface.WorkerID wid storiface.WorkerID
done func() done func()
} }
@ -126,11 +126,11 @@ type activeResources struct {
waiting int waiting int
} }
type workerRequest struct { type WorkerRequest struct {
sector storage.SectorRef Sector storage.SectorRef
taskType sealtasks.TaskType TaskType sealtasks.TaskType
priority int // larger values more important Priority int // larger values more important
sel WorkerSelector Sel WorkerSelector
prepare WorkerAction prepare WorkerAction
work WorkerAction work WorkerAction
@ -139,25 +139,27 @@ type workerRequest struct {
index int // The index of the item in the heap. index int // The index of the item in the heap.
indexHeap int IndexHeap int
ret chan<- workerResponse ret chan<- workerResponse
ctx context.Context Ctx context.Context
} }
type workerResponse struct { type workerResponse struct {
err error err error
} }
func newScheduler() *scheduler { func newScheduler() *Scheduler {
return &scheduler{ return &Scheduler{
workers: map[storiface.WorkerID]*workerHandle{}, assigner: &AssignerUtil{},
schedule: make(chan *workerRequest), Workers: map[storiface.WorkerID]*WorkerHandle{},
windowRequests: make(chan *schedWindowRequest, 20),
schedule: make(chan *WorkerRequest),
windowRequests: make(chan *SchedWindowRequest, 20),
workerChange: make(chan struct{}, 20), workerChange: make(chan struct{}, 20),
workerDisable: make(chan workerDisableReq), workerDisable: make(chan workerDisableReq),
schedQueue: &requestQueue{}, SchedQueue: &RequestQueue{},
workTracker: &workTracker{ workTracker: &workTracker{
done: map[storiface.CallID]struct{}{}, done: map[storiface.CallID]struct{}{},
@ -172,15 +174,15 @@ func newScheduler() *scheduler {
} }
} }
func (sh *scheduler) Schedule(ctx context.Context, sector storage.SectorRef, taskType sealtasks.TaskType, sel WorkerSelector, prepare WorkerAction, work WorkerAction) error { func (sh *Scheduler) Schedule(ctx context.Context, sector storage.SectorRef, taskType sealtasks.TaskType, sel WorkerSelector, prepare WorkerAction, work WorkerAction) error {
ret := make(chan workerResponse) ret := make(chan workerResponse)
select { select {
case sh.schedule <- &workerRequest{ case sh.schedule <- &WorkerRequest{
sector: sector, Sector: sector,
taskType: taskType, TaskType: taskType,
priority: getPriority(ctx), Priority: getPriority(ctx),
sel: sel, Sel: sel,
prepare: prepare, prepare: prepare,
work: work, work: work,
@ -188,7 +190,7 @@ func (sh *scheduler) Schedule(ctx context.Context, sector storage.SectorRef, tas
start: time.Now(), start: time.Now(),
ret: ret, ret: ret,
ctx: ctx, Ctx: ctx,
}: }:
case <-sh.closing: case <-sh.closing:
return xerrors.New("closing") return xerrors.New("closing")
@ -206,10 +208,10 @@ func (sh *scheduler) Schedule(ctx context.Context, sector storage.SectorRef, tas
} }
} }
func (r *workerRequest) respond(err error) { func (r *WorkerRequest) respond(err error) {
select { select {
case r.ret <- workerResponse{err: err}: case r.ret <- workerResponse{err: err}:
case <-r.ctx.Done(): case <-r.Ctx.Done():
log.Warnf("request got cancelled before we could respond") log.Warnf("request got cancelled before we could respond")
} }
} }
@ -225,7 +227,7 @@ type SchedDiagInfo struct {
OpenWindows []string OpenWindows []string
} }
func (sh *scheduler) runSched() { func (sh *Scheduler) runSched() {
defer close(sh.closed) defer close(sh.closed)
iw := time.After(InitWait) iw := time.After(InitWait)
@ -242,14 +244,14 @@ func (sh *scheduler) runSched() {
toDisable = append(toDisable, dreq) toDisable = append(toDisable, dreq)
doSched = true doSched = true
case req := <-sh.schedule: case req := <-sh.schedule:
sh.schedQueue.Push(req) sh.SchedQueue.Push(req)
doSched = true doSched = true
if sh.testSync != nil { if sh.testSync != nil {
sh.testSync <- struct{}{} sh.testSync <- struct{}{}
} }
case req := <-sh.windowRequests: case req := <-sh.windowRequests:
sh.openWindows = append(sh.openWindows, req) sh.OpenWindows = append(sh.OpenWindows, req)
doSched = true doSched = true
case ireq := <-sh.info: case ireq := <-sh.info:
ireq(sh.diag()) ireq(sh.diag())
@ -273,12 +275,12 @@ func (sh *scheduler) runSched() {
case dreq := <-sh.workerDisable: case dreq := <-sh.workerDisable:
toDisable = append(toDisable, dreq) toDisable = append(toDisable, dreq)
case req := <-sh.schedule: case req := <-sh.schedule:
sh.schedQueue.Push(req) sh.SchedQueue.Push(req)
if sh.testSync != nil { if sh.testSync != nil {
sh.testSync <- struct{}{} sh.testSync <- struct{}{}
} }
case req := <-sh.windowRequests: case req := <-sh.windowRequests:
sh.openWindows = append(sh.openWindows, req) sh.OpenWindows = append(sh.OpenWindows, req)
default: default:
break loop break loop
} }
@ -286,21 +288,21 @@ func (sh *scheduler) runSched() {
for _, req := range toDisable { for _, req := range toDisable {
for _, window := range req.activeWindows { for _, window := range req.activeWindows {
for _, request := range window.todo { for _, request := range window.Todo {
sh.schedQueue.Push(request) sh.SchedQueue.Push(request)
} }
} }
openWindows := make([]*schedWindowRequest, 0, len(sh.openWindows)) openWindows := make([]*SchedWindowRequest, 0, len(sh.OpenWindows))
for _, window := range sh.openWindows { for _, window := range sh.OpenWindows {
if window.worker != req.wid { if window.Worker != req.wid {
openWindows = append(openWindows, window) openWindows = append(openWindows, window)
} }
} }
sh.openWindows = openWindows sh.OpenWindows = openWindows
sh.workersLk.Lock() sh.workersLk.Lock()
sh.workers[req.wid].enabled = false sh.Workers[req.wid].Enabled = false
sh.workersLk.Unlock() sh.workersLk.Unlock()
req.done() req.done()
@ -312,281 +314,51 @@ func (sh *scheduler) runSched() {
} }
} }
func (sh *scheduler) diag() SchedDiagInfo { func (sh *Scheduler) diag() SchedDiagInfo {
var out SchedDiagInfo var out SchedDiagInfo
for sqi := 0; sqi < sh.schedQueue.Len(); sqi++ { for sqi := 0; sqi < sh.SchedQueue.Len(); sqi++ {
task := (*sh.schedQueue)[sqi] task := (*sh.SchedQueue)[sqi]
out.Requests = append(out.Requests, SchedDiagRequestInfo{ out.Requests = append(out.Requests, SchedDiagRequestInfo{
Sector: task.sector.ID, Sector: task.Sector.ID,
TaskType: task.taskType, TaskType: task.TaskType,
Priority: task.priority, Priority: task.Priority,
}) })
} }
sh.workersLk.RLock() sh.workersLk.RLock()
defer sh.workersLk.RUnlock() defer sh.workersLk.RUnlock()
for _, window := range sh.openWindows { for _, window := range sh.OpenWindows {
out.OpenWindows = append(out.OpenWindows, uuid.UUID(window.worker).String()) out.OpenWindows = append(out.OpenWindows, uuid.UUID(window.Worker).String())
} }
return out return out
} }
func (sh *scheduler) trySched() { type Assigner interface {
/* TrySched(sh *Scheduler)
This assigns tasks to workers based on: }
- Task priority (achieved by handling sh.schedQueue in order, since it's already sorted by priority)
- Worker resource availability
- Task-specified worker preference (acceptableWindows array below sorted by this preference)
- Window request age
1. For each task in the schedQueue find windows which can handle them
1.1. Create list of windows capable of handling a task
1.2. Sort windows according to task selector preferences
2. Going through schedQueue again, assign task to first acceptable window
with resources available
3. Submit windows with scheduled tasks to workers
*/
func (sh *Scheduler) trySched() {
sh.workersLk.RLock() sh.workersLk.RLock()
defer sh.workersLk.RUnlock() defer sh.workersLk.RUnlock()
windowsLen := len(sh.openWindows) sh.assigner.TrySched(sh)
queueLen := sh.schedQueue.Len()
log.Debugf("SCHED %d queued; %d open windows", queueLen, windowsLen)
if windowsLen == 0 || queueLen == 0 {
// nothing to schedule on
return
} }
windows := make([]schedWindow, windowsLen) func (sh *Scheduler) schedClose() {
acceptableWindows := make([][]int, queueLen) // QueueIndex -> []OpenWindowIndex
// Step 1
throttle := make(chan struct{}, windowsLen)
var wg sync.WaitGroup
wg.Add(queueLen)
for i := 0; i < queueLen; i++ {
throttle <- struct{}{}
go func(sqi int) {
defer wg.Done()
defer func() {
<-throttle
}()
task := (*sh.schedQueue)[sqi]
task.indexHeap = sqi
for wnd, windowRequest := range sh.openWindows {
worker, ok := sh.workers[windowRequest.worker]
if !ok {
log.Errorf("worker referenced by windowRequest not found (worker: %s)", windowRequest.worker)
// TODO: How to move forward here?
continue
}
if !worker.enabled {
log.Debugw("skipping disabled worker", "worker", windowRequest.worker)
continue
}
needRes := worker.info.Resources.ResourceSpec(task.sector.ProofType, task.taskType)
// TODO: allow bigger windows
if !windows[wnd].allocated.canHandleRequest(needRes, windowRequest.worker, "schedAcceptable", worker.info) {
continue
}
rpcCtx, cancel := context.WithTimeout(task.ctx, SelectorTimeout)
ok, err := task.sel.Ok(rpcCtx, task.taskType, task.sector.ProofType, worker)
cancel()
if err != nil {
log.Errorf("trySched(1) req.sel.Ok error: %+v", err)
continue
}
if !ok {
continue
}
acceptableWindows[sqi] = append(acceptableWindows[sqi], wnd)
}
if len(acceptableWindows[sqi]) == 0 {
return
}
// Pick best worker (shuffle in case some workers are equally as good)
rand.Shuffle(len(acceptableWindows[sqi]), func(i, j int) {
acceptableWindows[sqi][i], acceptableWindows[sqi][j] = acceptableWindows[sqi][j], acceptableWindows[sqi][i] // nolint:scopelint
})
sort.SliceStable(acceptableWindows[sqi], func(i, j int) bool {
wii := sh.openWindows[acceptableWindows[sqi][i]].worker // nolint:scopelint
wji := sh.openWindows[acceptableWindows[sqi][j]].worker // nolint:scopelint
if wii == wji {
// for the same worker prefer older windows
return acceptableWindows[sqi][i] < acceptableWindows[sqi][j] // nolint:scopelint
}
wi := sh.workers[wii]
wj := sh.workers[wji]
rpcCtx, cancel := context.WithTimeout(task.ctx, SelectorTimeout)
defer cancel()
r, err := task.sel.Cmp(rpcCtx, task.taskType, wi, wj)
if err != nil {
log.Errorf("selecting best worker: %s", err)
}
return r
})
}(i)
}
wg.Wait()
log.Debugf("SCHED windows: %+v", windows)
log.Debugf("SCHED Acceptable win: %+v", acceptableWindows)
// Step 2
scheduled := 0
rmQueue := make([]int, 0, queueLen)
workerUtil := map[storiface.WorkerID]float64{}
for sqi := 0; sqi < queueLen; sqi++ {
task := (*sh.schedQueue)[sqi]
selectedWindow := -1
var needRes storiface.Resources
var info storiface.WorkerInfo
var bestWid storiface.WorkerID
bestUtilization := math.MaxFloat64 // smaller = better
for i, wnd := range acceptableWindows[task.indexHeap] {
wid := sh.openWindows[wnd].worker
w := sh.workers[wid]
res := info.Resources.ResourceSpec(task.sector.ProofType, task.taskType)
log.Debugf("SCHED try assign sqi:%d sector %d to window %d (awi:%d)", sqi, task.sector.ID.Number, wnd, i)
// TODO: allow bigger windows
if !windows[wnd].allocated.canHandleRequest(needRes, wid, "schedAssign", info) {
continue
}
wu, found := workerUtil[wid]
if !found {
wu = w.utilization()
workerUtil[wid] = wu
}
if wu >= bestUtilization {
// acceptable worker list is initially sorted by utilization, and the initially-best workers
// will be assigned tasks first. This means that if we find a worker which isn't better, it
// probably means that the other workers aren't better either.
//
// utilization
// ^
// | /
// | \ /
// | \ /
// | *
// #--------> acceptableWindow index
//
// * -> we're here
break
}
info = w.info
needRes = res
bestWid = wid
selectedWindow = wnd
bestUtilization = wu
}
if selectedWindow < 0 {
// all windows full
continue
}
log.Debugw("SCHED ASSIGNED",
"sqi", sqi,
"sector", task.sector.ID.Number,
"task", task.taskType,
"window", selectedWindow,
"worker", bestWid,
"utilization", bestUtilization)
workerUtil[bestWid] += windows[selectedWindow].allocated.add(info.Resources, needRes)
windows[selectedWindow].todo = append(windows[selectedWindow].todo, task)
rmQueue = append(rmQueue, sqi)
scheduled++
}
if len(rmQueue) > 0 {
for i := len(rmQueue) - 1; i >= 0; i-- {
sh.schedQueue.Remove(rmQueue[i])
}
}
// Step 3
if scheduled == 0 {
return
}
scheduledWindows := map[int]struct{}{}
for wnd, window := range windows {
if len(window.todo) == 0 {
// Nothing scheduled here, keep the window open
continue
}
scheduledWindows[wnd] = struct{}{}
window := window // copy
select {
case sh.openWindows[wnd].done <- &window:
default:
log.Error("expected sh.openWindows[wnd].done to be buffered")
}
}
// Rewrite sh.openWindows array, removing scheduled windows
newOpenWindows := make([]*schedWindowRequest, 0, windowsLen-len(scheduledWindows))
for wnd, window := range sh.openWindows {
if _, scheduled := scheduledWindows[wnd]; scheduled {
// keep unscheduled windows open
continue
}
newOpenWindows = append(newOpenWindows, window)
}
sh.openWindows = newOpenWindows
}
func (sh *scheduler) schedClose() {
sh.workersLk.Lock() sh.workersLk.Lock()
defer sh.workersLk.Unlock() defer sh.workersLk.Unlock()
log.Debugf("closing scheduler") log.Debugf("closing scheduler")
for i, w := range sh.workers { for i, w := range sh.Workers {
sh.workerCleanup(i, w) sh.workerCleanup(i, w)
} }
} }
func (sh *scheduler) Info(ctx context.Context) (interface{}, error) { func (sh *Scheduler) Info(ctx context.Context) (interface{}, error) {
ch := make(chan interface{}, 1) ch := make(chan interface{}, 1)
sh.info <- func(res interface{}) { sh.info <- func(res interface{}) {
@ -601,7 +373,7 @@ func (sh *scheduler) Info(ctx context.Context) (interface{}, error) {
} }
} }
func (sh *scheduler) Close(ctx context.Context) error { func (sh *Scheduler) Close(ctx context.Context) error {
close(sh.closing) close(sh.closing)
select { select {
case <-sh.closed: case <-sh.closed:

View File

@ -0,0 +1,254 @@
package sectorstorage
import (
"context"
"math"
"math/rand"
"sort"
"sync"
"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
)
// AssignerUtil is a task assigner assigning tasks to workers with lowest utilization
type AssignerUtil struct{}
var _ Assigner = &AssignerUtil{}
func (a *AssignerUtil) TrySched(sh *Scheduler) {
/*
This assigns tasks to workers based on:
- Task priority (achieved by handling sh.SchedQueue in order, since it's already sorted by priority)
- Worker resource availability
- Task-specified worker preference (acceptableWindows array below sorted by this preference)
- Window request age
1. For each task in the SchedQueue find windows which can handle them
1.1. Create list of windows capable of handling a task
1.2. Sort windows according to task selector preferences
2. Going through SchedQueue again, assign task to first acceptable window
with resources available
3. Submit windows with scheduled tasks to workers
*/
windowsLen := len(sh.OpenWindows)
queueLen := sh.SchedQueue.Len()
log.Debugf("SCHED %d queued; %d open windows", queueLen, windowsLen)
if windowsLen == 0 || queueLen == 0 {
// nothing to schedule on
return
}
windows := make([]SchedWindow, windowsLen)
acceptableWindows := make([][]int, queueLen) // QueueIndex -> []OpenWindowIndex
// Step 1
throttle := make(chan struct{}, windowsLen)
var wg sync.WaitGroup
wg.Add(queueLen)
for i := 0; i < queueLen; i++ {
throttle <- struct{}{}
go func(sqi int) {
defer wg.Done()
defer func() {
<-throttle
}()
task := (*sh.SchedQueue)[sqi]
task.IndexHeap = sqi
for wnd, windowRequest := range sh.OpenWindows {
worker, ok := sh.Workers[windowRequest.Worker]
if !ok {
log.Errorf("worker referenced by windowRequest not found (worker: %s)", windowRequest.Worker)
// TODO: How to move forward here?
continue
}
if !worker.Enabled {
log.Debugw("skipping disabled worker", "worker", windowRequest.Worker)
continue
}
needRes := worker.Info.Resources.ResourceSpec(task.Sector.ProofType, task.TaskType)
// TODO: allow bigger windows
if !windows[wnd].Allocated.CanHandleRequest(needRes, windowRequest.Worker, "schedAcceptable", worker.Info) {
continue
}
rpcCtx, cancel := context.WithTimeout(task.Ctx, SelectorTimeout)
ok, err := task.Sel.Ok(rpcCtx, task.TaskType, task.Sector.ProofType, worker)
cancel()
if err != nil {
log.Errorf("trySched(1) req.Sel.Ok error: %+v", err)
continue
}
if !ok {
continue
}
acceptableWindows[sqi] = append(acceptableWindows[sqi], wnd)
}
if len(acceptableWindows[sqi]) == 0 {
return
}
// Pick best worker (shuffle in case some workers are equally as good)
rand.Shuffle(len(acceptableWindows[sqi]), func(i, j int) {
acceptableWindows[sqi][i], acceptableWindows[sqi][j] = acceptableWindows[sqi][j], acceptableWindows[sqi][i] // nolint:scopelint
})
sort.SliceStable(acceptableWindows[sqi], func(i, j int) bool {
wii := sh.OpenWindows[acceptableWindows[sqi][i]].Worker // nolint:scopelint
wji := sh.OpenWindows[acceptableWindows[sqi][j]].Worker // nolint:scopelint
if wii == wji {
// for the same worker prefer older windows
return acceptableWindows[sqi][i] < acceptableWindows[sqi][j] // nolint:scopelint
}
wi := sh.Workers[wii]
wj := sh.Workers[wji]
rpcCtx, cancel := context.WithTimeout(task.Ctx, SelectorTimeout)
defer cancel()
r, err := task.Sel.Cmp(rpcCtx, task.TaskType, wi, wj)
if err != nil {
log.Errorf("selecting best worker: %s", err)
}
return r
})
}(i)
}
wg.Wait()
log.Debugf("SCHED windows: %+v", windows)
log.Debugf("SCHED Acceptable win: %+v", acceptableWindows)
// Step 2
scheduled := 0
rmQueue := make([]int, 0, queueLen)
workerUtil := map[storiface.WorkerID]float64{}
for sqi := 0; sqi < queueLen; sqi++ {
task := (*sh.SchedQueue)[sqi]
selectedWindow := -1
var needRes storiface.Resources
var info storiface.WorkerInfo
var bestWid storiface.WorkerID
bestUtilization := math.MaxFloat64 // smaller = better
for i, wnd := range acceptableWindows[task.IndexHeap] {
wid := sh.OpenWindows[wnd].Worker
w := sh.Workers[wid]
res := info.Resources.ResourceSpec(task.Sector.ProofType, task.TaskType)
log.Debugf("SCHED try assign sqi:%d sector %d to window %d (awi:%d)", sqi, task.Sector.ID.Number, wnd, i)
// TODO: allow bigger windows
if !windows[wnd].Allocated.CanHandleRequest(needRes, wid, "schedAssign", info) {
continue
}
wu, found := workerUtil[wid]
if !found {
wu = w.Utilization()
workerUtil[wid] = wu
}
if wu >= bestUtilization {
// acceptable worker list is initially sorted by utilization, and the initially-best workers
// will be assigned tasks first. This means that if we find a worker which isn't better, it
// probably means that the other workers aren't better either.
//
// utilization
// ^
// | /
// | \ /
// | \ /
// | *
// #--------> acceptableWindow index
//
// * -> we're here
break
}
info = w.Info
needRes = res
bestWid = wid
selectedWindow = wnd
bestUtilization = wu
}
if selectedWindow < 0 {
// all windows full
continue
}
log.Debugw("SCHED ASSIGNED",
"sqi", sqi,
"sector", task.Sector.ID.Number,
"task", task.TaskType,
"window", selectedWindow,
"worker", bestWid,
"utilization", bestUtilization)
workerUtil[bestWid] += windows[selectedWindow].Allocated.Add(info.Resources, needRes)
windows[selectedWindow].Todo = append(windows[selectedWindow].Todo, task)
rmQueue = append(rmQueue, sqi)
scheduled++
}
if len(rmQueue) > 0 {
for i := len(rmQueue) - 1; i >= 0; i-- {
sh.SchedQueue.Remove(rmQueue[i])
}
}
// Step 3
if scheduled == 0 {
return
}
scheduledWindows := map[int]struct{}{}
for wnd, window := range windows {
if len(window.Todo) == 0 {
// Nothing scheduled here, keep the window open
continue
}
scheduledWindows[wnd] = struct{}{}
window := window // copy
select {
case sh.OpenWindows[wnd].Done <- &window:
default:
log.Error("expected sh.OpenWindows[wnd].Done to be buffered")
}
}
// Rewrite sh.OpenWindows array, removing scheduled windows
newOpenWindows := make([]*SchedWindowRequest, 0, windowsLen-len(scheduledWindows))
for wnd, window := range sh.OpenWindows {
if _, scheduled := scheduledWindows[wnd]; scheduled {
// keep unscheduled windows open
continue
}
newOpenWindows = append(newOpenWindows, window)
}
sh.OpenWindows = newOpenWindows
}

View File

@ -17,7 +17,7 @@ import (
type poStScheduler struct { type poStScheduler struct {
lk sync.RWMutex lk sync.RWMutex
workers map[storiface.WorkerID]*workerHandle workers map[storiface.WorkerID]*WorkerHandle
cond *sync.Cond cond *sync.Cond
postType sealtasks.TaskType postType sealtasks.TaskType
@ -25,14 +25,14 @@ type poStScheduler struct {
func newPoStScheduler(t sealtasks.TaskType) *poStScheduler { func newPoStScheduler(t sealtasks.TaskType) *poStScheduler {
ps := &poStScheduler{ ps := &poStScheduler{
workers: map[storiface.WorkerID]*workerHandle{}, workers: map[storiface.WorkerID]*WorkerHandle{},
postType: t, postType: t,
} }
ps.cond = sync.NewCond(&ps.lk) ps.cond = sync.NewCond(&ps.lk)
return ps return ps
} }
func (ps *poStScheduler) MaybeAddWorker(wid storiface.WorkerID, tasks map[sealtasks.TaskType]struct{}, w *workerHandle) bool { func (ps *poStScheduler) MaybeAddWorker(wid storiface.WorkerID, tasks map[sealtasks.TaskType]struct{}, w *WorkerHandle) bool {
if _, ok := tasks[ps.postType]; !ok { if _, ok := tasks[ps.postType]; !ok {
return false return false
} }
@ -49,10 +49,10 @@ func (ps *poStScheduler) MaybeAddWorker(wid storiface.WorkerID, tasks map[sealta
return true return true
} }
func (ps *poStScheduler) delWorker(wid storiface.WorkerID) *workerHandle { func (ps *poStScheduler) delWorker(wid storiface.WorkerID) *WorkerHandle {
ps.lk.Lock() ps.lk.Lock()
defer ps.lk.Unlock() defer ps.lk.Unlock()
var w *workerHandle = nil var w *WorkerHandle = nil
if wh, ok := ps.workers[wid]; ok { if wh, ok := ps.workers[wid]; ok {
w = wh w = wh
delete(ps.workers, wid) delete(ps.workers, wid)
@ -68,7 +68,7 @@ func (ps *poStScheduler) CanSched(ctx context.Context) bool {
} }
for _, w := range ps.workers { for _, w := range ps.workers {
if w.enabled { if w.Enabled {
return true return true
} }
} }
@ -105,7 +105,7 @@ func (ps *poStScheduler) Schedule(ctx context.Context, primary bool, spt abi.Reg
selected := candidates[0] selected := candidates[0]
worker := ps.workers[selected.id] worker := ps.workers[selected.id]
return worker.active.withResources(selected.id, worker.info, selected.res, &ps.lk, func() error { return worker.active.withResources(selected.id, worker.Info, selected.res, &ps.lk, func() error {
ps.lk.Unlock() ps.lk.Unlock()
defer ps.lk.Lock() defer ps.lk.Lock()
@ -122,9 +122,9 @@ func (ps *poStScheduler) readyWorkers(spt abi.RegisteredSealProof) (bool, []cand
var accepts []candidateWorker var accepts []candidateWorker
//if the gpus of the worker are insufficient or it's disabled, it cannot be scheduled //if the gpus of the worker are insufficient or it's disabled, it cannot be scheduled
for wid, wr := range ps.workers { for wid, wr := range ps.workers {
needRes := wr.info.Resources.ResourceSpec(spt, ps.postType) needRes := wr.Info.Resources.ResourceSpec(spt, ps.postType)
if !wr.active.canHandleRequest(needRes, wid, "post-readyWorkers", wr.info) { if !wr.active.CanHandleRequest(needRes, wid, "post-readyWorkers", wr.Info) {
continue continue
} }
@ -145,16 +145,16 @@ func (ps *poStScheduler) readyWorkers(spt abi.RegisteredSealProof) (bool, []cand
func (ps *poStScheduler) disable(wid storiface.WorkerID) { func (ps *poStScheduler) disable(wid storiface.WorkerID) {
ps.lk.Lock() ps.lk.Lock()
defer ps.lk.Unlock() defer ps.lk.Unlock()
ps.workers[wid].enabled = false ps.workers[wid].Enabled = false
} }
func (ps *poStScheduler) enable(wid storiface.WorkerID) { func (ps *poStScheduler) enable(wid storiface.WorkerID) {
ps.lk.Lock() ps.lk.Lock()
defer ps.lk.Unlock() defer ps.lk.Unlock()
ps.workers[wid].enabled = true ps.workers[wid].Enabled = true
} }
func (ps *poStScheduler) watch(wid storiface.WorkerID, worker *workerHandle) { func (ps *poStScheduler) watch(wid storiface.WorkerID, worker *WorkerHandle) {
heartbeatTimer := time.NewTicker(stores.HeartbeatInterval) heartbeatTimer := time.NewTicker(stores.HeartbeatInterval)
defer heartbeatTimer.Stop() defer heartbeatTimer.Stop()
@ -197,7 +197,7 @@ func (ps *poStScheduler) watch(wid storiface.WorkerID, worker *workerHandle) {
} }
} }
func (ps *poStScheduler) workerCleanup(wid storiface.WorkerID, w *workerHandle) { func (ps *poStScheduler) workerCleanup(wid storiface.WorkerID, w *WorkerHandle) {
select { select {
case <-w.closingMgr: case <-w.closingMgr:
default: default:
@ -223,7 +223,7 @@ func (ps *poStScheduler) schedClose() {
} }
} }
func (ps *poStScheduler) WorkerStats(ctx context.Context, cb func(ctx context.Context, wid storiface.WorkerID, worker *workerHandle)) { func (ps *poStScheduler) WorkerStats(ctx context.Context, cb func(ctx context.Context, wid storiface.WorkerID, worker *WorkerHandle)) {
ps.lk.RLock() ps.lk.RLock()
defer ps.lk.RUnlock() defer ps.lk.RUnlock()
for id, w := range ps.workers { for id, w := range ps.workers {

View File

@ -10,7 +10,7 @@ import (
) )
func (a *activeResources) withResources(id storiface.WorkerID, wr storiface.WorkerInfo, r storiface.Resources, locker sync.Locker, cb func() error) error { func (a *activeResources) withResources(id storiface.WorkerID, wr storiface.WorkerInfo, r storiface.Resources, locker sync.Locker, cb func() error) error {
for !a.canHandleRequest(r, id, "withResources", wr) { for !a.CanHandleRequest(r, id, "withResources", wr) {
if a.cond == nil { if a.cond == nil {
a.cond = sync.NewCond(locker) a.cond = sync.NewCond(locker)
} }
@ -19,7 +19,7 @@ func (a *activeResources) withResources(id storiface.WorkerID, wr storiface.Work
a.waiting-- a.waiting--
} }
a.add(wr.Resources, r) a.Add(wr.Resources, r)
err := cb() err := cb()
@ -34,7 +34,7 @@ func (a *activeResources) hasWorkWaiting() bool {
} }
// add task resources to activeResources and return utilization difference // add task resources to activeResources and return utilization difference
func (a *activeResources) add(wr storiface.WorkerResources, r storiface.Resources) float64 { func (a *activeResources) Add(wr storiface.WorkerResources, r storiface.Resources) float64 {
startUtil := a.utilization(wr) startUtil := a.utilization(wr)
if r.GPUUtilization > 0 { if r.GPUUtilization > 0 {
@ -60,9 +60,9 @@ func (a *activeResources) free(wr storiface.WorkerResources, r storiface.Resourc
} }
} }
// canHandleRequest evaluates if the worker has enough available resources to // CanHandleRequest evaluates if the worker has enough available resources to
// handle the request. // handle the request.
func (a *activeResources) canHandleRequest(needRes storiface.Resources, wid storiface.WorkerID, caller string, info storiface.WorkerInfo) bool { func (a *activeResources) CanHandleRequest(needRes storiface.Resources, wid storiface.WorkerID, caller string, info storiface.WorkerInfo) bool {
if info.IgnoreResources { if info.IgnoreResources {
// shortcircuit; if this worker is ignoring resources, it can always handle the request. // shortcircuit; if this worker is ignoring resources, it can always handle the request.
return true return true
@ -145,14 +145,14 @@ func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
return max return max
} }
func (wh *workerHandle) utilization() float64 { func (wh *WorkerHandle) Utilization() float64 {
wh.lk.Lock() wh.lk.Lock()
u := wh.active.utilization(wh.info.Resources) u := wh.active.utilization(wh.Info.Resources)
u += wh.preparing.utilization(wh.info.Resources) u += wh.preparing.utilization(wh.Info.Resources)
wh.lk.Unlock() wh.lk.Unlock()
wh.wndLk.Lock() wh.wndLk.Lock()
for _, window := range wh.activeWindows { for _, window := range wh.activeWindows {
u += window.allocated.utilization(wh.info.Resources) u += window.Allocated.utilization(wh.Info.Resources)
} }
wh.wndLk.Unlock() wh.wndLk.Unlock()
@ -161,7 +161,7 @@ func (wh *workerHandle) utilization() float64 {
var tasksCacheTimeout = 30 * time.Second var tasksCacheTimeout = 30 * time.Second
func (wh *workerHandle) TaskTypes(ctx context.Context) (t map[sealtasks.TaskType]struct{}, err error) { func (wh *WorkerHandle) TaskTypes(ctx context.Context) (t map[sealtasks.TaskType]struct{}, err error) {
wh.tasksLk.Lock() wh.tasksLk.Lock()
defer wh.tasksLk.Unlock() defer wh.tasksLk.Unlock()

View File

@ -183,7 +183,7 @@ func (s *schedTestWorker) Close() error {
var _ Worker = &schedTestWorker{} var _ Worker = &schedTestWorker{}
func addTestWorker(t *testing.T, sched *scheduler, index *stores.Index, name string, taskTypes map[sealtasks.TaskType]struct{}, resources storiface.WorkerResources, ignoreResources bool) { func addTestWorker(t *testing.T, sched *Scheduler, index *stores.Index, name string, taskTypes map[sealtasks.TaskType]struct{}, resources storiface.WorkerResources, ignoreResources bool) {
w := &schedTestWorker{ w := &schedTestWorker{
name: name, name: name,
taskTypes: taskTypes, taskTypes: taskTypes,
@ -259,13 +259,13 @@ func TestSched(t *testing.T) {
wg sync.WaitGroup wg sync.WaitGroup
} }
type task func(*testing.T, *scheduler, *stores.Index, *runMeta) type task func(*testing.T, *Scheduler, *stores.Index, *runMeta)
sched := func(taskName, expectWorker string, sid abi.SectorNumber, taskType sealtasks.TaskType) task { sched := func(taskName, expectWorker string, sid abi.SectorNumber, taskType sealtasks.TaskType) task {
_, _, l, _ := runtime.Caller(1) _, _, l, _ := runtime.Caller(1)
_, _, l2, _ := runtime.Caller(2) _, _, l2, _ := runtime.Caller(2)
return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { return func(t *testing.T, sched *Scheduler, index *stores.Index, rm *runMeta) {
done := make(chan struct{}) done := make(chan struct{})
rm.done[taskName] = done rm.done[taskName] = done
@ -314,7 +314,7 @@ func TestSched(t *testing.T) {
taskStarted := func(name string) task { taskStarted := func(name string) task {
_, _, l, _ := runtime.Caller(1) _, _, l, _ := runtime.Caller(1)
_, _, l2, _ := runtime.Caller(2) _, _, l2, _ := runtime.Caller(2)
return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { return func(t *testing.T, sched *Scheduler, index *stores.Index, rm *runMeta) {
select { select {
case rm.done[name] <- struct{}{}: case rm.done[name] <- struct{}{}:
case <-ctx.Done(): case <-ctx.Done():
@ -326,7 +326,7 @@ func TestSched(t *testing.T) {
taskDone := func(name string) task { taskDone := func(name string) task {
_, _, l, _ := runtime.Caller(1) _, _, l, _ := runtime.Caller(1)
_, _, l2, _ := runtime.Caller(2) _, _, l2, _ := runtime.Caller(2)
return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { return func(t *testing.T, sched *Scheduler, index *stores.Index, rm *runMeta) {
select { select {
case rm.done[name] <- struct{}{}: case rm.done[name] <- struct{}{}:
case <-ctx.Done(): case <-ctx.Done():
@ -339,7 +339,7 @@ func TestSched(t *testing.T) {
taskNotScheduled := func(name string) task { taskNotScheduled := func(name string) task {
_, _, l, _ := runtime.Caller(1) _, _, l, _ := runtime.Caller(1)
_, _, l2, _ := runtime.Caller(2) _, _, l2, _ := runtime.Caller(2)
return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { return func(t *testing.T, sched *Scheduler, index *stores.Index, rm *runMeta) {
select { select {
case rm.done[name] <- struct{}{}: case rm.done[name] <- struct{}{}:
t.Fatal("not expected", l, l2) t.Fatal("not expected", l, l2)
@ -378,7 +378,7 @@ func TestSched(t *testing.T) {
} }
multTask := func(tasks ...task) task { multTask := func(tasks ...task) task {
return func(t *testing.T, s *scheduler, index *stores.Index, meta *runMeta) { return func(t *testing.T, s *Scheduler, index *stores.Index, meta *runMeta) {
for _, tsk := range tasks { for _, tsk := range tasks {
tsk(t, s, index, meta) tsk(t, s, index, meta)
} }
@ -492,7 +492,7 @@ func TestSched(t *testing.T) {
} }
diag := func() task { diag := func() task {
return func(t *testing.T, s *scheduler, index *stores.Index, meta *runMeta) { return func(t *testing.T, s *Scheduler, index *stores.Index, meta *runMeta) {
time.Sleep(20 * time.Millisecond) time.Sleep(20 * time.Millisecond)
for _, request := range s.diag().Requests { for _, request := range s.diag().Requests {
log.Infof("!!! sDIAG: sid(%d) task(%s)", request.Sector.Number, request.TaskType) log.Infof("!!! sDIAG: sid(%d) task(%s)", request.Sector.Number, request.TaskType)
@ -582,12 +582,12 @@ func TestSched(t *testing.T) {
type slowishSelector bool type slowishSelector bool
func (s slowishSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *workerHandle) (bool, error) { func (s slowishSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *WorkerHandle) (bool, error) {
time.Sleep(200 * time.Microsecond) time.Sleep(200 * time.Microsecond)
return bool(s), nil return bool(s), nil
} }
func (s slowishSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *workerHandle) (bool, error) { func (s slowishSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) {
time.Sleep(100 * time.Microsecond) time.Sleep(100 * time.Microsecond)
return true, nil return true, nil
} }
@ -605,9 +605,9 @@ func BenchmarkTrySched(b *testing.B) {
b.StopTimer() b.StopTimer()
sched := newScheduler() sched := newScheduler()
sched.workers[storiface.WorkerID{}] = &workerHandle{ sched.Workers[storiface.WorkerID{}] = &WorkerHandle{
workerRpc: nil, workerRpc: nil,
info: storiface.WorkerInfo{ Info: storiface.WorkerInfo{
Hostname: "t", Hostname: "t",
Resources: decentWorkerResources, Resources: decentWorkerResources,
}, },
@ -616,17 +616,17 @@ func BenchmarkTrySched(b *testing.B) {
} }
for i := 0; i < windows; i++ { for i := 0; i < windows; i++ {
sched.openWindows = append(sched.openWindows, &schedWindowRequest{ sched.OpenWindows = append(sched.OpenWindows, &SchedWindowRequest{
worker: storiface.WorkerID{}, Worker: storiface.WorkerID{},
done: make(chan *schedWindow, 1000), Done: make(chan *SchedWindow, 1000),
}) })
} }
for i := 0; i < queue; i++ { for i := 0; i < queue; i++ {
sched.schedQueue.Push(&workerRequest{ sched.SchedQueue.Push(&WorkerRequest{
taskType: sealtasks.TTCommit2, TaskType: sealtasks.TTCommit2,
sel: slowishSelector(true), Sel: slowishSelector(true),
ctx: ctx, Ctx: ctx,
}) })
} }
@ -644,26 +644,26 @@ func BenchmarkTrySched(b *testing.B) {
} }
func TestWindowCompact(t *testing.T) { func TestWindowCompact(t *testing.T) {
sh := scheduler{} sh := Scheduler{}
spt := abi.RegisteredSealProof_StackedDrg32GiBV1 spt := abi.RegisteredSealProof_StackedDrg32GiBV1
test := func(start [][]sealtasks.TaskType, expect [][]sealtasks.TaskType) func(t *testing.T) { test := func(start [][]sealtasks.TaskType, expect [][]sealtasks.TaskType) func(t *testing.T) {
return func(t *testing.T) { return func(t *testing.T) {
wh := &workerHandle{ wh := &WorkerHandle{
info: storiface.WorkerInfo{ Info: storiface.WorkerInfo{
Resources: decentWorkerResources, Resources: decentWorkerResources,
}, },
} }
for _, windowTasks := range start { for _, windowTasks := range start {
window := &schedWindow{} window := &SchedWindow{}
for _, task := range windowTasks { for _, task := range windowTasks {
window.todo = append(window.todo, &workerRequest{ window.Todo = append(window.Todo, &WorkerRequest{
taskType: task, TaskType: task,
sector: storage.SectorRef{ProofType: spt}, Sector: storage.SectorRef{ProofType: spt},
}) })
window.allocated.add(wh.info.Resources, storiface.ResourceTable[task][spt]) window.Allocated.Add(wh.Info.Resources, storiface.ResourceTable[task][spt])
} }
wh.activeWindows = append(wh.activeWindows, window) wh.activeWindows = append(wh.activeWindows, window)
@ -681,14 +681,14 @@ func TestWindowCompact(t *testing.T) {
var expectRes activeResources var expectRes activeResources
for ti, task := range tasks { for ti, task := range tasks {
require.Equal(t, task, wh.activeWindows[wi].todo[ti].taskType, "%d, %d", wi, ti) require.Equal(t, task, wh.activeWindows[wi].Todo[ti].TaskType, "%d, %d", wi, ti)
expectRes.add(wh.info.Resources, storiface.ResourceTable[task][spt]) expectRes.Add(wh.Info.Resources, storiface.ResourceTable[task][spt])
} }
require.Equal(t, expectRes.cpuUse, wh.activeWindows[wi].allocated.cpuUse, "%d", wi) require.Equal(t, expectRes.cpuUse, wh.activeWindows[wi].Allocated.cpuUse, "%d", wi)
require.Equal(t, expectRes.gpuUsed, wh.activeWindows[wi].allocated.gpuUsed, "%d", wi) require.Equal(t, expectRes.gpuUsed, wh.activeWindows[wi].Allocated.gpuUsed, "%d", wi)
require.Equal(t, expectRes.memUsedMin, wh.activeWindows[wi].allocated.memUsedMin, "%d", wi) require.Equal(t, expectRes.memUsedMin, wh.activeWindows[wi].Allocated.memUsedMin, "%d", wi)
require.Equal(t, expectRes.memUsedMax, wh.activeWindows[wi].allocated.memUsedMax, "%d", wi) require.Equal(t, expectRes.memUsedMax, wh.activeWindows[wi].Allocated.memUsedMax, "%d", wi)
} }
} }

View File

@ -12,31 +12,31 @@ import (
) )
type schedWorker struct { type schedWorker struct {
sched *scheduler sched *Scheduler
worker *workerHandle worker *WorkerHandle
wid storiface.WorkerID wid storiface.WorkerID
heartbeatTimer *time.Ticker heartbeatTimer *time.Ticker
scheduledWindows chan *schedWindow scheduledWindows chan *SchedWindow
taskDone chan struct{} taskDone chan struct{}
windowsRequested int windowsRequested int
} }
func newWorkerHandle(ctx context.Context, w Worker) (*workerHandle, error) { func newWorkerHandle(ctx context.Context, w Worker) (*WorkerHandle, error) {
info, err := w.Info(ctx) info, err := w.Info(ctx)
if err != nil { if err != nil {
return nil, xerrors.Errorf("getting worker info: %w", err) return nil, xerrors.Errorf("getting worker info: %w", err)
} }
worker := &workerHandle{ worker := &WorkerHandle{
workerRpc: w, workerRpc: w,
info: info, Info: info,
preparing: &activeResources{}, preparing: &activeResources{},
active: &activeResources{}, active: &activeResources{},
enabled: true, Enabled: true,
closingMgr: make(chan struct{}), closingMgr: make(chan struct{}),
closedMgr: make(chan struct{}), closedMgr: make(chan struct{}),
@ -46,9 +46,9 @@ func newWorkerHandle(ctx context.Context, w Worker) (*workerHandle, error) {
} }
// context only used for startup // context only used for startup
func (sh *scheduler) runWorker(ctx context.Context, wid storiface.WorkerID, worker *workerHandle) error { func (sh *Scheduler) runWorker(ctx context.Context, wid storiface.WorkerID, worker *WorkerHandle) error {
sh.workersLk.Lock() sh.workersLk.Lock()
_, exist := sh.workers[wid] _, exist := sh.Workers[wid]
if exist { if exist {
log.Warnw("duplicated worker added", "id", wid) log.Warnw("duplicated worker added", "id", wid)
@ -57,7 +57,7 @@ func (sh *scheduler) runWorker(ctx context.Context, wid storiface.WorkerID, work
return nil return nil
} }
sh.workers[wid] = worker sh.Workers[wid] = worker
sh.workersLk.Unlock() sh.workersLk.Unlock()
sw := &schedWorker{ sw := &schedWorker{
@ -67,7 +67,7 @@ func (sh *scheduler) runWorker(ctx context.Context, wid storiface.WorkerID, work
wid: wid, wid: wid,
heartbeatTimer: time.NewTicker(stores.HeartbeatInterval), heartbeatTimer: time.NewTicker(stores.HeartbeatInterval),
scheduledWindows: make(chan *schedWindow, SchedWindows), scheduledWindows: make(chan *SchedWindow, SchedWindows),
taskDone: make(chan struct{}, 1), taskDone: make(chan struct{}, 1),
windowsRequested: 0, windowsRequested: 0,
@ -94,7 +94,7 @@ func (sw *schedWorker) handleWorker() {
} }
sched.workersLk.Lock() sched.workersLk.Lock()
delete(sched.workers, sw.wid) delete(sched.Workers, sw.wid)
sched.workersLk.Unlock() sched.workersLk.Unlock()
}() }()
@ -103,7 +103,7 @@ func (sw *schedWorker) handleWorker() {
for { for {
{ {
sched.workersLk.Lock() sched.workersLk.Lock()
enabled := worker.enabled enabled := worker.Enabled
sched.workersLk.Unlock() sched.workersLk.Unlock()
// ask for more windows if we need them (non-blocking) // ask for more windows if we need them (non-blocking)
@ -124,8 +124,8 @@ func (sw *schedWorker) handleWorker() {
// session looks good // session looks good
{ {
sched.workersLk.Lock() sched.workersLk.Lock()
enabled := worker.enabled enabled := worker.Enabled
worker.enabled = true worker.Enabled = true
sched.workersLk.Unlock() sched.workersLk.Unlock()
if !enabled { if !enabled {
@ -248,9 +248,9 @@ func (sw *schedWorker) checkSession(ctx context.Context) bool {
func (sw *schedWorker) requestWindows() bool { func (sw *schedWorker) requestWindows() bool {
for ; sw.windowsRequested < SchedWindows; sw.windowsRequested++ { for ; sw.windowsRequested < SchedWindows; sw.windowsRequested++ {
select { select {
case sw.sched.windowRequests <- &schedWindowRequest{ case sw.sched.windowRequests <- &SchedWindowRequest{
worker: sw.wid, Worker: sw.wid,
done: sw.scheduledWindows, Done: sw.scheduledWindows,
}: }:
case <-sw.sched.closing: case <-sw.sched.closing:
return false return false
@ -290,21 +290,21 @@ func (sw *schedWorker) workerCompactWindows() {
lower := worker.activeWindows[wi] lower := worker.activeWindows[wi]
var moved []int var moved []int
for ti, todo := range window.todo { for ti, todo := range window.Todo {
needRes := worker.info.Resources.ResourceSpec(todo.sector.ProofType, todo.taskType) needRes := worker.Info.Resources.ResourceSpec(todo.Sector.ProofType, todo.TaskType)
if !lower.allocated.canHandleRequest(needRes, sw.wid, "compactWindows", worker.info) { if !lower.Allocated.CanHandleRequest(needRes, sw.wid, "compactWindows", worker.Info) {
continue continue
} }
moved = append(moved, ti) moved = append(moved, ti)
lower.todo = append(lower.todo, todo) lower.Todo = append(lower.Todo, todo)
lower.allocated.add(worker.info.Resources, needRes) lower.Allocated.Add(worker.Info.Resources, needRes)
window.allocated.free(worker.info.Resources, needRes) window.Allocated.free(worker.Info.Resources, needRes)
} }
if len(moved) > 0 { if len(moved) > 0 {
newTodo := make([]*workerRequest, 0, len(window.todo)-len(moved)) newTodo := make([]*WorkerRequest, 0, len(window.Todo)-len(moved))
for i, t := range window.todo { for i, t := range window.Todo {
if len(moved) > 0 && moved[0] == i { if len(moved) > 0 && moved[0] == i {
moved = moved[1:] moved = moved[1:]
continue continue
@ -312,16 +312,16 @@ func (sw *schedWorker) workerCompactWindows() {
newTodo = append(newTodo, t) newTodo = append(newTodo, t)
} }
window.todo = newTodo window.Todo = newTodo
} }
} }
} }
var compacted int var compacted int
var newWindows []*schedWindow var newWindows []*SchedWindow
for _, window := range worker.activeWindows { for _, window := range worker.activeWindows {
if len(window.todo) == 0 { if len(window.Todo) == 0 {
compacted++ compacted++
continue continue
} }
@ -347,13 +347,13 @@ assignLoop:
firstWindow := worker.activeWindows[0] firstWindow := worker.activeWindows[0]
// process tasks within a window, preferring tasks at lower indexes // process tasks within a window, preferring tasks at lower indexes
for len(firstWindow.todo) > 0 { for len(firstWindow.Todo) > 0 {
tidx := -1 tidx := -1
worker.lk.Lock() worker.lk.Lock()
for t, todo := range firstWindow.todo { for t, todo := range firstWindow.Todo {
needRes := worker.info.Resources.ResourceSpec(todo.sector.ProofType, todo.taskType) needRes := worker.Info.Resources.ResourceSpec(todo.Sector.ProofType, todo.TaskType)
if worker.preparing.canHandleRequest(needRes, sw.wid, "startPreparing", worker.info) { if worker.preparing.CanHandleRequest(needRes, sw.wid, "startPreparing", worker.Info) {
tidx = t tidx = t
break break
} }
@ -364,9 +364,9 @@ assignLoop:
break assignLoop break assignLoop
} }
todo := firstWindow.todo[tidx] todo := firstWindow.Todo[tidx]
log.Debugf("assign worker sector %d", todo.sector.ID.Number) log.Debugf("assign worker sector %d", todo.Sector.ID.Number)
err := sw.startProcessingTask(todo) err := sw.startProcessingTask(todo)
if err != nil { if err != nil {
@ -375,9 +375,9 @@ assignLoop:
} }
// Note: we're not freeing window.allocated resources here very much on purpose // Note: we're not freeing window.allocated resources here very much on purpose
copy(firstWindow.todo[tidx:], firstWindow.todo[tidx+1:]) copy(firstWindow.Todo[tidx:], firstWindow.Todo[tidx+1:])
firstWindow.todo[len(firstWindow.todo)-1] = nil firstWindow.Todo[len(firstWindow.Todo)-1] = nil
firstWindow.todo = firstWindow.todo[:len(firstWindow.todo)-1] firstWindow.Todo = firstWindow.Todo[:len(firstWindow.Todo)-1]
} }
copy(worker.activeWindows, worker.activeWindows[1:]) copy(worker.activeWindows, worker.activeWindows[1:])
@ -405,16 +405,16 @@ assignLoop:
firstWindow := worker.activeWindows[0] firstWindow := worker.activeWindows[0]
// process tasks within a window, preferring tasks at lower indexes // process tasks within a window, preferring tasks at lower indexes
for len(firstWindow.todo) > 0 { for len(firstWindow.Todo) > 0 {
tidx := -1 tidx := -1
for t, todo := range firstWindow.todo { for t, todo := range firstWindow.Todo {
if todo.taskType != sealtasks.TTCommit1 && todo.taskType != sealtasks.TTCommit2 { // todo put in task if todo.TaskType != sealtasks.TTCommit1 && todo.TaskType != sealtasks.TTCommit2 { // todo put in task
continue continue
} }
needRes := storiface.ResourceTable[todo.taskType][todo.sector.ProofType] needRes := storiface.ResourceTable[todo.TaskType][todo.Sector.ProofType]
if worker.active.canHandleRequest(needRes, sw.wid, "startPreparing", worker.info) { if worker.active.CanHandleRequest(needRes, sw.wid, "startPreparing", worker.Info) {
tidx = t tidx = t
break break
} }
@ -424,9 +424,9 @@ assignLoop:
break assignLoop break assignLoop
} }
todo := firstWindow.todo[tidx] todo := firstWindow.Todo[tidx]
log.Debugf("assign worker sector %d (ready)", todo.sector.ID.Number) log.Debugf("assign worker sector %d (ready)", todo.Sector.ID.Number)
err := sw.startProcessingReadyTask(todo) err := sw.startProcessingReadyTask(todo)
if err != nil { if err != nil {
@ -435,9 +435,9 @@ assignLoop:
} }
// Note: we're not freeing window.allocated resources here very much on purpose // Note: we're not freeing window.allocated resources here very much on purpose
copy(firstWindow.todo[tidx:], firstWindow.todo[tidx+1:]) copy(firstWindow.Todo[tidx:], firstWindow.Todo[tidx+1:])
firstWindow.todo[len(firstWindow.todo)-1] = nil firstWindow.Todo[len(firstWindow.Todo)-1] = nil
firstWindow.todo = firstWindow.todo[:len(firstWindow.todo)-1] firstWindow.Todo = firstWindow.Todo[:len(firstWindow.Todo)-1]
} }
copy(worker.activeWindows, worker.activeWindows[1:]) copy(worker.activeWindows, worker.activeWindows[1:])
@ -448,24 +448,24 @@ assignLoop:
} }
} }
func (sw *schedWorker) startProcessingTask(req *workerRequest) error { func (sw *schedWorker) startProcessingTask(req *WorkerRequest) error {
w, sh := sw.worker, sw.sched w, sh := sw.worker, sw.sched
needRes := w.info.Resources.ResourceSpec(req.sector.ProofType, req.taskType) needRes := w.Info.Resources.ResourceSpec(req.Sector.ProofType, req.TaskType)
w.lk.Lock() w.lk.Lock()
w.preparing.add(w.info.Resources, needRes) w.preparing.Add(w.Info.Resources, needRes)
w.lk.Unlock() w.lk.Unlock()
go func() { go func() {
// first run the prepare step (e.g. fetching sector data from other worker) // first run the prepare step (e.g. fetching sector data from other worker)
tw := sh.workTracker.worker(sw.wid, w.info, w.workerRpc) tw := sh.workTracker.worker(sw.wid, w.Info, w.workerRpc)
tw.start() tw.start()
err := req.prepare(req.ctx, tw) err := req.prepare(req.Ctx, tw)
w.lk.Lock() w.lk.Lock()
if err != nil { if err != nil {
w.preparing.free(w.info.Resources, needRes) w.preparing.free(w.Info.Resources, needRes)
w.lk.Unlock() w.lk.Unlock()
select { select {
@ -477,7 +477,7 @@ func (sw *schedWorker) startProcessingTask(req *workerRequest) error {
select { select {
case req.ret <- workerResponse{err: err}: case req.ret <- workerResponse{err: err}:
case <-req.ctx.Done(): case <-req.Ctx.Done():
log.Warnf("request got cancelled before we could respond (prepare error: %+v)", err) log.Warnf("request got cancelled before we could respond (prepare error: %+v)", err)
case <-sh.closing: case <-sh.closing:
log.Warnf("scheduler closed while sending response (prepare error: %+v)", err) log.Warnf("scheduler closed while sending response (prepare error: %+v)", err)
@ -485,17 +485,17 @@ func (sw *schedWorker) startProcessingTask(req *workerRequest) error {
return return
} }
tw = sh.workTracker.worker(sw.wid, w.info, w.workerRpc) tw = sh.workTracker.worker(sw.wid, w.Info, w.workerRpc)
// start tracking work first early in case we need to wait for resources // start tracking work first early in case we need to wait for resources
werr := make(chan error, 1) werr := make(chan error, 1)
go func() { go func() {
werr <- req.work(req.ctx, tw) werr <- req.work(req.Ctx, tw)
}() }()
// wait (if needed) for resources in the 'active' window // wait (if needed) for resources in the 'active' window
err = w.active.withResources(sw.wid, w.info, needRes, &w.lk, func() error { err = w.active.withResources(sw.wid, w.Info, needRes, &w.lk, func() error {
w.preparing.free(w.info.Resources, needRes) w.preparing.free(w.Info.Resources, needRes)
w.lk.Unlock() w.lk.Unlock()
defer w.lk.Lock() // we MUST return locked from this function defer w.lk.Lock() // we MUST return locked from this function
@ -511,7 +511,7 @@ func (sw *schedWorker) startProcessingTask(req *workerRequest) error {
select { select {
case req.ret <- workerResponse{err: err}: case req.ret <- workerResponse{err: err}:
case <-req.ctx.Done(): case <-req.Ctx.Done():
log.Warnf("request got cancelled before we could respond") log.Warnf("request got cancelled before we could respond")
case <-sh.closing: case <-sh.closing:
log.Warnf("scheduler closed while sending response") log.Warnf("scheduler closed while sending response")
@ -531,22 +531,22 @@ func (sw *schedWorker) startProcessingTask(req *workerRequest) error {
return nil return nil
} }
func (sw *schedWorker) startProcessingReadyTask(req *workerRequest) error { func (sw *schedWorker) startProcessingReadyTask(req *WorkerRequest) error {
w, sh := sw.worker, sw.sched w, sh := sw.worker, sw.sched
needRes := w.info.Resources.ResourceSpec(req.sector.ProofType, req.taskType) needRes := w.Info.Resources.ResourceSpec(req.Sector.ProofType, req.TaskType)
w.active.add(w.info.Resources, needRes) w.active.Add(w.Info.Resources, needRes)
go func() { go func() {
// Do the work! // Do the work!
tw := sh.workTracker.worker(sw.wid, w.info, w.workerRpc) tw := sh.workTracker.worker(sw.wid, w.Info, w.workerRpc)
tw.start() tw.start()
err := req.work(req.ctx, tw) err := req.work(req.Ctx, tw)
select { select {
case req.ret <- workerResponse{err: err}: case req.ret <- workerResponse{err: err}:
case <-req.ctx.Done(): case <-req.Ctx.Done():
log.Warnf("request got cancelled before we could respond") log.Warnf("request got cancelled before we could respond")
case <-sh.closing: case <-sh.closing:
log.Warnf("scheduler closed while sending response") log.Warnf("scheduler closed while sending response")
@ -554,7 +554,7 @@ func (sw *schedWorker) startProcessingReadyTask(req *workerRequest) error {
w.lk.Lock() w.lk.Lock()
w.active.free(w.info.Resources, needRes) w.active.free(w.Info.Resources, needRes)
select { select {
case sw.taskDone <- struct{}{}: case sw.taskDone <- struct{}{}:
@ -574,7 +574,7 @@ func (sw *schedWorker) startProcessingReadyTask(req *workerRequest) error {
return nil return nil
} }
func (sh *scheduler) workerCleanup(wid storiface.WorkerID, w *workerHandle) { func (sh *Scheduler) workerCleanup(wid storiface.WorkerID, w *WorkerHandle) {
select { select {
case <-w.closingMgr: case <-w.closingMgr:
default: default:
@ -592,13 +592,13 @@ func (sh *scheduler) workerCleanup(wid storiface.WorkerID, w *workerHandle) {
if !w.cleanupStarted { if !w.cleanupStarted {
w.cleanupStarted = true w.cleanupStarted = true
newWindows := make([]*schedWindowRequest, 0, len(sh.openWindows)) newWindows := make([]*SchedWindowRequest, 0, len(sh.OpenWindows))
for _, window := range sh.openWindows { for _, window := range sh.OpenWindows {
if window.worker != wid { if window.Worker != wid {
newWindows = append(newWindows, window) newWindows = append(newWindows, window)
} }
} }
sh.openWindows = newWindows sh.OpenWindows = newWindows
log.Debugf("worker %s dropped", wid) log.Debugf("worker %s dropped", wid)
} }

View File

@ -26,7 +26,7 @@ func newAllocSelector(index stores.SectorIndex, alloc storiface.SectorFileType,
} }
} }
func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) { func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, error) {
tasks, err := whnd.TaskTypes(ctx) tasks, err := whnd.TaskTypes(ctx)
if err != nil { if err != nil {
return false, xerrors.Errorf("getting supported worker task types: %w", err) return false, xerrors.Errorf("getting supported worker task types: %w", err)
@ -64,8 +64,8 @@ func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi
return false, nil return false, nil
} }
func (s *allocSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *workerHandle) (bool, error) { func (s *allocSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) {
return a.utilization() < b.utilization(), nil return a.Utilization() < b.Utilization(), nil
} }
var _ WorkerSelector = &allocSelector{} var _ WorkerSelector = &allocSelector{}

View File

@ -28,7 +28,7 @@ func newExistingSelector(index stores.SectorIndex, sector abi.SectorID, alloc st
} }
} }
func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) { func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, error) {
tasks, err := whnd.TaskTypes(ctx) tasks, err := whnd.TaskTypes(ctx)
if err != nil { if err != nil {
return false, xerrors.Errorf("getting supported worker task types: %w", err) return false, xerrors.Errorf("getting supported worker task types: %w", err)
@ -66,8 +66,8 @@ func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt
return false, nil return false, nil
} }
func (s *existingSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *workerHandle) (bool, error) { func (s *existingSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) {
return a.utilization() < b.utilization(), nil return a.Utilization() < b.Utilization(), nil
} }
var _ WorkerSelector = &existingSelector{} var _ WorkerSelector = &existingSelector{}

View File

@ -19,7 +19,7 @@ func newTaskSelector() *taskSelector {
return &taskSelector{} return &taskSelector{}
} }
func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) { func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, error) {
tasks, err := whnd.TaskTypes(ctx) tasks, err := whnd.TaskTypes(ctx)
if err != nil { if err != nil {
return false, xerrors.Errorf("getting supported worker task types: %w", err) return false, xerrors.Errorf("getting supported worker task types: %w", err)
@ -29,7 +29,7 @@ func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.
return supported, nil return supported, nil
} }
func (s *taskSelector) Cmp(ctx context.Context, _ sealtasks.TaskType, a, b *workerHandle) (bool, error) { func (s *taskSelector) Cmp(ctx context.Context, _ sealtasks.TaskType, a, b *WorkerHandle) (bool, error) {
atasks, err := a.TaskTypes(ctx) atasks, err := a.TaskTypes(ctx)
if err != nil { if err != nil {
return false, xerrors.Errorf("getting supported worker task types: %w", err) return false, xerrors.Errorf("getting supported worker task types: %w", err)
@ -43,7 +43,7 @@ func (s *taskSelector) Cmp(ctx context.Context, _ sealtasks.TaskType, a, b *work
return len(atasks) < len(btasks), nil // prefer workers which can do less return len(atasks) < len(btasks), nil // prefer workers which can do less
} }
return a.utilization() < b.utilization(), nil return a.Utilization() < b.Utilization(), nil
} }
var _ WorkerSelector = &taskSelector{} var _ WorkerSelector = &taskSelector{}

View File

@ -15,7 +15,7 @@ func (m *Manager) WorkerStats(ctx context.Context) map[uuid.UUID]storiface.Worke
out := map[uuid.UUID]storiface.WorkerStats{} out := map[uuid.UUID]storiface.WorkerStats{}
cb := func(ctx context.Context, id storiface.WorkerID, handle *workerHandle) { cb := func(ctx context.Context, id storiface.WorkerID, handle *WorkerHandle) {
handle.lk.Lock() handle.lk.Lock()
ctx, cancel := context.WithTimeout(ctx, 3*time.Second) ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
@ -32,9 +32,9 @@ func (m *Manager) WorkerStats(ctx context.Context) map[uuid.UUID]storiface.Worke
} }
out[uuid.UUID(id)] = storiface.WorkerStats{ out[uuid.UUID(id)] = storiface.WorkerStats{
Info: handle.info, Info: handle.Info,
Tasks: taskList, Tasks: taskList,
Enabled: handle.enabled, Enabled: handle.Enabled,
MemUsedMin: handle.active.memUsedMin, MemUsedMin: handle.active.memUsedMin,
MemUsedMax: handle.active.memUsedMax, MemUsedMax: handle.active.memUsedMax,
GpuUsed: handle.active.gpuUsed, GpuUsed: handle.active.gpuUsed,
@ -43,7 +43,7 @@ func (m *Manager) WorkerStats(ctx context.Context) map[uuid.UUID]storiface.Worke
handle.lk.Unlock() handle.lk.Unlock()
} }
for id, handle := range m.sched.workers { for id, handle := range m.sched.Workers {
cb(ctx, id, handle) cb(ctx, id, handle)
} }
@ -72,14 +72,14 @@ func (m *Manager) WorkerJobs() map[uuid.UUID][]storiface.WorkerJob {
m.sched.workersLk.RLock() m.sched.workersLk.RLock()
for id, handle := range m.sched.workers { for id, handle := range m.sched.Workers {
handle.wndLk.Lock() handle.wndLk.Lock()
for wi, window := range handle.activeWindows { for wi, window := range handle.activeWindows {
for _, request := range window.todo { for _, request := range window.Todo {
out[uuid.UUID(id)] = append(out[uuid.UUID(id)], storiface.WorkerJob{ out[uuid.UUID(id)] = append(out[uuid.UUID(id)], storiface.WorkerJob{
ID: storiface.UndefCall, ID: storiface.UndefCall,
Sector: request.sector.ID, Sector: request.Sector.ID,
Task: request.taskType, Task: request.TaskType,
RunWait: wi + 2, RunWait: wi + 2,
Start: request.start, Start: request.start,
}) })