Merge pull request #8447 from filecoin-project/feat/sched-better

feat: sched: Improve worker assigning logic
2022-04-09 20:50:31 +02:00 · 2022-04-09 20:50:31 +02:00 · 7900479e9c
commit 7900479e9c
parent e2f4c70b70 80133aaa79
5 changed files with 90 additions and 19 deletions
--- a/extern/sector-storage/sched.go
+++ b/extern/sector-storage/sched.go
@ -2,6 +2,7 @@ package sectorstorage
 import (
 	"context"
 	"math"
 	"math/rand"
 	"sort"
 	"sync"
@ -76,6 +77,10 @@ type scheduler struct {
 type workerHandle struct {
 	workerRpc Worker
 	tasksCache  map[sealtasks.TaskType]struct{}
 	tasksUpdate time.Time
 	tasksLk     sync.Mutex
 	info storiface.WorkerInfo
 	preparing *activeResources // use with workerHandle.lk
@ -361,7 +366,7 @@ func (sh *scheduler) trySched() {
 	}
 	windows := make([]schedWindow, windowsLen)
-	acceptableWindows := make([][]int, queueLen)
+	acceptableWindows := make([][]int, queueLen) // QueueIndex -> []OpenWindowIndex
 	// Step 1
 	throttle := make(chan struct{}, windowsLen)
@ -455,41 +460,73 @@ func (sh *scheduler) trySched() {
 	// Step 2
 	scheduled := 0
 	rmQueue := make([]int, 0, queueLen)
 	workerUtil := map[storiface.WorkerID]float64{}
 	for sqi := 0; sqi < queueLen; sqi++ {
 		task := (*sh.schedQueue)[sqi]
 		selectedWindow := -1
-		for _, wnd := range acceptableWindows[task.indexHeap] {
+		var needRes storiface.Resources
 		var info storiface.WorkerInfo
 		var bestWid storiface.WorkerID
 		bestUtilization := math.MaxFloat64 // smaller = better
 		for i, wnd := range acceptableWindows[task.indexHeap] {
 			wid := sh.openWindows[wnd].worker
-			info := sh.workers[wid].info
+			w := sh.workers[wid]
-			log.Debugf("SCHED try assign sqi:%d sector %d to window %d", sqi, task.sector.ID.Number, wnd)
+			res := info.Resources.ResourceSpec(task.sector.ProofType, task.taskType)
-			needRes := info.Resources.ResourceSpec(task.sector.ProofType, task.taskType)
+			log.Debugf("SCHED try assign sqi:%d sector %d to window %d (awi:%d)", sqi, task.sector.ID.Number, wnd, i)
 			// TODO: allow bigger windows
 			if !windows[wnd].allocated.canHandleRequest(needRes, wid, "schedAssign", info) {
 				continue
 			}
-			log.Debugf("SCHED ASSIGNED sqi:%d sector %d task %s to window %d", sqi, task.sector.ID.Number, task.taskType, wnd)
+			wu, found := workerUtil[wid]
-
+			if !found {
-			windows[wnd].allocated.add(info.Resources, needRes)
+				wu = w.utilization()
-			// TODO: We probably want to re-sort acceptableWindows here based on new
+				workerUtil[wid] = wu
-			//  workerHandle.utilization + windows[wnd].allocated.utilization (workerHandle.utilization is used in all
+			}
-			//  task selectors, but not in the same way, so need to figure out how to do that in a non-O(n^2 way), and
+			if wu >= bestUtilization {
-			//  without additional network roundtrips (O(n^2) could be avoided by turning acceptableWindows.[] into heaps))
+				// acceptable worker list is initially sorted by utilization, and the initially-best workers
-
+				// will be assigned tasks first. This means that if we find a worker which isn't better, it
-			selectedWindow = wnd
+				// probably means that the other workers aren't better either.
 				//
 				// utilization
 				// ^
 				// |       /
 				// | \    /
 				// |  \  /
 				// |   *
 				// #--------> acceptableWindow index
 				//
 				// * -> we're here
 				break
 			}
 			info = w.info
 			needRes = res
 			bestWid = wid
 			selectedWindow = wnd
 			bestUtilization = wu
 		}
 		if selectedWindow < 0 {
 			// all windows full
 			continue
 		}
 		log.Debugw("SCHED ASSIGNED",
 			"sqi", sqi,
 			"sector", task.sector.ID.Number,
 			"task", task.taskType,
 			"window", selectedWindow,
 			"worker", bestWid,
 			"utilization", bestUtilization)
 		workerUtil[bestWid] += windows[selectedWindow].allocated.add(info.Resources, needRes)
 		windows[selectedWindow].todo = append(windows[selectedWindow].todo, task)
 		rmQueue = append(rmQueue, sqi)
--- a/extern/sector-storage/sched_resources.go
+++ b/extern/sector-storage/sched_resources.go
@ -1,8 +1,11 @@
 package sectorstorage
 import (
 	"context"
 	"sync"
 	"time"
 	"github.com/filecoin-project/lotus/extern/sector-storage/sealtasks"
 	"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
 )
@ -30,13 +33,18 @@ func (a *activeResources) hasWorkWaiting() bool {
 	return a.waiting > 0
 }
-func (a *activeResources) add(wr storiface.WorkerResources, r storiface.Resources) {
+// add task resources to activeResources and return utilization difference
 func (a *activeResources) add(wr storiface.WorkerResources, r storiface.Resources) float64 {
 	startUtil := a.utilization(wr)
 	if r.GPUUtilization > 0 {
 		a.gpuUsed += r.GPUUtilization
 	}
 	a.cpuUse += r.Threads(wr.CPUs, len(wr.GPUs))
 	a.memUsedMin += r.MinMemory
 	a.memUsedMax += r.MaxMemory
 	return a.utilization(wr) - startUtil
 }
 func (a *activeResources) free(wr storiface.WorkerResources, r storiface.Resources) {
@ -101,6 +109,7 @@ func (a *activeResources) canHandleRequest(needRes storiface.Resources, wid stor
 	return true
 }
 // utilization returns a number in 0..1 range indicating fraction of used resources
 func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
 	var max float64
@ -126,6 +135,13 @@ func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
 		max = memMax
 	}
 	if len(wr.GPUs) > 0 {
 		gpuMax := a.gpuUsed / float64(len(wr.GPUs))
 		if gpuMax > max {
 			max = gpuMax
 		}
 	}
 	return max
 }
@ -142,3 +158,20 @@ func (wh *workerHandle) utilization() float64 {
 	return u
 }
 var tasksCacheTimeout = 30 * time.Second
 func (wh *workerHandle) TaskTypes(ctx context.Context) (t map[sealtasks.TaskType]struct{}, err error) {
 	wh.tasksLk.Lock()
 	defer wh.tasksLk.Unlock()
 	if wh.tasksCache == nil || time.Now().Sub(wh.tasksUpdate) > tasksCacheTimeout {
 		wh.tasksCache, err = wh.workerRpc.TaskTypes(ctx)
 		if err != nil {
 			return nil, err
 		}
 		wh.tasksUpdate = time.Now()
 	}
 	return wh.tasksCache, nil
 }
--- a/extern/sector-storage/selector_alloc.go
+++ b/extern/sector-storage/selector_alloc.go
@ -27,7 +27,7 @@ func newAllocSelector(index stores.SectorIndex, alloc storiface.SectorFileType,
 }
 func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) {
-	tasks, err := whnd.workerRpc.TaskTypes(ctx)
+	tasks, err := whnd.TaskTypes(ctx)
 	if err != nil {
 		return false, xerrors.Errorf("getting supported worker task types: %w", err)
 	}
--- a/extern/sector-storage/selector_existing.go
+++ b/extern/sector-storage/selector_existing.go
@ -29,7 +29,7 @@ func newExistingSelector(index stores.SectorIndex, sector abi.SectorID, alloc st
 }
 func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) {
-	tasks, err := whnd.workerRpc.TaskTypes(ctx)
+	tasks, err := whnd.TaskTypes(ctx)
 	if err != nil {
 		return false, xerrors.Errorf("getting supported worker task types: %w", err)
 	}
--- a/extern/sector-storage/selector_task.go
+++ b/extern/sector-storage/selector_task.go
@ -20,7 +20,7 @@ func newTaskSelector() *taskSelector {
 }
 func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) {
-	tasks, err := whnd.workerRpc.TaskTypes(ctx)
+	tasks, err := whnd.TaskTypes(ctx)
 	if err != nil {
 		return false, xerrors.Errorf("getting supported worker task types: %w", err)
 	}
@ -30,11 +30,12 @@ func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.
 }
 func (s *taskSelector) Cmp(ctx context.Context, _ sealtasks.TaskType, a, b *workerHandle) (bool, error) {
-	atasks, err := a.workerRpc.TaskTypes(ctx)
+	atasks, err := a.TaskTypes(ctx)
 	if err != nil {
 		return false, xerrors.Errorf("getting supported worker task types: %w", err)
 	}
-	btasks, err := b.workerRpc.TaskTypes(ctx)
+
 	btasks, err := b.TaskTypes(ctx)
 	if err != nil {
 		return false, xerrors.Errorf("getting supported worker task types: %w", err)
 	}