2020-03-23 11:40:02 +00:00
package sectorstorage
import (
2020-05-07 23:38:05 +00:00
"container/heap"
2020-04-27 18:37:31 +00:00
"context"
2020-07-09 10:58:52 +00:00
"math/rand"
2020-04-27 18:37:31 +00:00
"sort"
"sync"
2020-06-23 09:42:47 +00:00
"time"
2020-04-27 18:37:31 +00:00
2020-03-23 11:40:02 +00:00
"golang.org/x/xerrors"
2020-03-27 20:08:06 +00:00
"github.com/filecoin-project/specs-actors/actors/abi"
2020-03-27 23:21:36 +00:00
"github.com/filecoin-project/sector-storage/sealtasks"
2020-04-23 22:16:21 +00:00
"github.com/filecoin-project/sector-storage/storiface"
2020-03-23 11:40:02 +00:00
)
2020-06-24 21:06:56 +00:00
type schedPrioCtxKey int
var SchedPriorityKey schedPrioCtxKey
var DefaultSchedPriority = 0
2020-07-09 10:58:52 +00:00
var SelectorTimeout = 5 * time . Second
var (
SchedWindows = 2
)
2020-06-24 21:06:56 +00:00
func getPriority ( ctx context . Context ) int {
sp := ctx . Value ( SchedPriorityKey )
if p , ok := sp . ( int ) ; ok {
return p
}
return DefaultSchedPriority
}
func WithPriority ( ctx context . Context , priority int ) context . Context {
return context . WithValue ( ctx , SchedPriorityKey , priority )
}
2020-03-23 11:40:02 +00:00
const mib = 1 << 20
2020-04-27 18:37:31 +00:00
type WorkerAction func ( ctx context . Context , w Worker ) error
type WorkerSelector interface {
2020-06-15 12:32:17 +00:00
Ok ( ctx context . Context , task sealtasks . TaskType , spt abi . RegisteredSealProof , a * workerHandle ) ( bool , error ) // true if worker is acceptable for performing a task
2020-04-27 18:37:31 +00:00
Cmp ( ctx context . Context , task sealtasks . TaskType , a , b * workerHandle ) ( bool , error ) // true if a is preferred over b
}
type scheduler struct {
2020-06-15 12:32:17 +00:00
spt abi . RegisteredSealProof
2020-04-27 18:37:31 +00:00
workersLk sync . Mutex
nextWorker WorkerID
workers map [ WorkerID ] * workerHandle
newWorkers chan * workerHandle
2020-05-01 18:00:17 +00:00
watchClosing chan WorkerID
workerClosing chan WorkerID
2020-07-09 10:58:52 +00:00
schedule chan * workerRequest
windowRequests chan * schedWindowRequest
// owned by the sh.runSched goroutine
schedQueue * requestQueue
openWindows [ ] * schedWindowRequest
closing chan struct { }
}
type workerHandle struct {
w Worker
info storiface . WorkerInfo
preparing * activeResources
active * activeResources
}
type schedWindowRequest struct {
worker WorkerID
done chan * schedWindow
}
type schedWindow struct {
worker WorkerID
allocated * activeResources
todo [ ] * workerRequest
}
type activeResources struct {
memUsedMin uint64
memUsedMax uint64
gpuUsed bool
cpuUse uint64
cond * sync . Cond
}
type workerRequest struct {
sector abi . SectorID
taskType sealtasks . TaskType
priority int // larger values more important
sel WorkerSelector
prepare WorkerAction
work WorkerAction
index int // The index of the item in the heap.
ret chan <- workerResponse
ctx context . Context
}
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
type workerResponse struct {
err error
2020-04-27 18:37:31 +00:00
}
2020-06-15 12:32:17 +00:00
func newScheduler ( spt abi . RegisteredSealProof ) * scheduler {
2020-04-27 18:37:31 +00:00
return & scheduler {
spt : spt ,
nextWorker : 0 ,
workers : map [ WorkerID ] * workerHandle { } ,
2020-05-01 18:04:21 +00:00
newWorkers : make ( chan * workerHandle ) ,
2020-05-01 18:00:17 +00:00
watchClosing : make ( chan WorkerID ) ,
workerClosing : make ( chan WorkerID ) ,
2020-07-09 10:58:52 +00:00
schedule : make ( chan * workerRequest ) ,
closing : make ( chan struct { } ) ,
2020-04-27 18:37:31 +00:00
2020-05-07 23:38:05 +00:00
schedQueue : & requestQueue { } ,
2020-04-27 18:37:31 +00:00
}
}
2020-05-13 23:56:21 +00:00
func ( sh * scheduler ) Schedule ( ctx context . Context , sector abi . SectorID , taskType sealtasks . TaskType , sel WorkerSelector , prepare WorkerAction , work WorkerAction ) error {
2020-04-27 18:37:31 +00:00
ret := make ( chan workerResponse )
select {
case sh . schedule <- & workerRequest {
2020-05-13 23:56:21 +00:00
sector : sector ,
2020-04-27 18:37:31 +00:00
taskType : taskType ,
2020-06-24 21:06:56 +00:00
priority : getPriority ( ctx ) ,
2020-04-27 18:37:31 +00:00
sel : sel ,
prepare : prepare ,
work : work ,
ret : ret ,
ctx : ctx ,
} :
case <- sh . closing :
return xerrors . New ( "closing" )
case <- ctx . Done ( ) :
return ctx . Err ( )
}
select {
case resp := <- ret :
return resp . err
case <- sh . closing :
return xerrors . New ( "closing" )
case <- ctx . Done ( ) :
return ctx . Err ( )
}
}
func ( r * workerRequest ) respond ( err error ) {
2020-03-23 11:40:02 +00:00
select {
2020-04-27 18:37:31 +00:00
case r . ret <- workerResponse { err : err } :
case <- r . ctx . Done ( ) :
2020-03-23 11:40:02 +00:00
log . Warnf ( "request got cancelled before we could respond" )
}
}
2020-04-27 18:37:31 +00:00
func ( sh * scheduler ) runSched ( ) {
2020-05-01 18:00:17 +00:00
go sh . runWorkerWatcher ( )
2020-03-23 11:40:02 +00:00
for {
select {
2020-04-27 18:37:31 +00:00
case w := <- sh . newWorkers :
2020-07-09 10:58:52 +00:00
sh . newWorker ( w )
2020-05-01 18:00:17 +00:00
case wid := <- sh . workerClosing :
2020-07-09 10:58:52 +00:00
sh . dropWorker ( wid )
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
case req := <- sh . schedule :
2020-05-07 23:38:05 +00:00
heap . Push ( sh . schedQueue , req )
2020-07-09 10:58:52 +00:00
sh . trySched ( )
case req := <- sh . windowRequests :
sh . openWindows = append ( sh . openWindows , req )
sh . trySched ( )
2020-04-27 18:37:31 +00:00
case <- sh . closing :
sh . schedClose ( )
2020-03-24 23:49:45 +00:00
return
2020-03-23 11:40:02 +00:00
}
}
}
2020-07-09 10:58:52 +00:00
func ( sh * scheduler ) trySched ( ) {
/ *
This assigns tasks to workers based on :
- Task priority ( achieved by handling sh . schedQueue in order , since it ' s already sorted by priority )
- Worker resource availability
- Task - specified worker preference ( acceptableWindows array below sorted by this preference )
- Window request age
2020-05-01 18:00:17 +00:00
2020-07-09 10:58:52 +00:00
1. For each task in the schedQueue find windows which can handle them
1.1 . Create list of windows capable of handling a task
1.2 . Sort windows according to task selector preferences
2. Going through schedQueue again , assign task to first acceptable window
with resources available
3. Submit windows with scheduled tasks to workers
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
* /
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
windows := make ( [ ] schedWindow , len ( sh . openWindows ) )
acceptableWindows := make ( [ ] [ ] int , sh . schedQueue . Len ( ) )
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
// Step 1
for sqi := 0 ; sqi < sh . schedQueue . Len ( ) ; sqi ++ {
task := ( * sh . schedQueue ) [ sqi ]
needRes := ResourceTable [ task . taskType ] [ sh . spt ]
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
for wnd , windowRequest := range sh . openWindows {
worker := sh . workers [ windowRequest . worker ]
2020-06-23 22:35:34 +00:00
2020-07-09 10:58:52 +00:00
// TODO: allow bigger windows
if ! windows [ wnd ] . allocated . canHandleRequest ( needRes , windowRequest . worker , worker . info . Resources ) {
continue
}
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
ok , err := task . sel . Ok ( task . ctx , task . taskType , sh . spt , worker )
if err != nil {
log . Errorf ( "trySched(1) req.sel.Ok error: %+v" , err )
continue
}
2020-04-29 14:04:05 +00:00
2020-07-09 10:58:52 +00:00
if ! ok {
continue
}
2020-06-23 09:42:47 +00:00
2020-07-09 10:58:52 +00:00
acceptableWindows [ sqi ] = append ( acceptableWindows [ sqi ] , wnd )
2020-04-27 18:37:31 +00:00
}
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
if len ( acceptableWindows [ sqi ] ) == 0 {
2020-04-27 18:37:31 +00:00
continue
2020-03-23 11:40:02 +00:00
}
2020-07-09 10:58:52 +00:00
// Pick best worker (shuffle in case some workers are equally as good)
rand . Shuffle ( len ( acceptableWindows [ sqi ] ) , func ( i , j int ) {
acceptableWindows [ sqi ] [ i ] , acceptableWindows [ sqi ] [ j ] = acceptableWindows [ sqi ] [ j ] , acceptableWindows [ sqi ] [ i ]
} )
sort . SliceStable ( acceptableWindows , func ( i , j int ) bool {
wii := sh . openWindows [ acceptableWindows [ sqi ] [ i ] ] . worker
wji := sh . openWindows [ acceptableWindows [ sqi ] [ j ] ] . worker
if wii == wji {
// for the same worker prefer older windows
return acceptableWindows [ sqi ] [ i ] < acceptableWindows [ sqi ] [ j ]
}
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
wi := sh . workers [ wii ]
wj := sh . workers [ wji ]
rpcCtx , cancel := context . WithTimeout ( task . ctx , SelectorTimeout )
defer cancel ( )
r , err := task . sel . Cmp ( rpcCtx , task . taskType , wi , wj )
if err != nil {
log . Error ( "selecting best worker: %s" , err )
}
return r
} )
2020-04-27 18:37:31 +00:00
}
2020-07-09 10:58:52 +00:00
// Step 2
scheduled := 0
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
for sqi := 0 ; sqi < sh . schedQueue . Len ( ) ; sqi ++ {
task := ( * sh . schedQueue ) [ sqi ]
needRes := ResourceTable [ task . taskType ] [ sh . spt ]
2020-06-23 22:35:34 +00:00
2020-07-09 10:58:52 +00:00
selectedWindow := - 1
for _ , wnd := range acceptableWindows [ sqi + scheduled ] {
wid := sh . openWindows [ wnd ] . worker
wr := sh . workers [ wid ] . info . Resources
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
// TODO: allow bigger windows
if windows [ wnd ] . allocated . canHandleRequest ( needRes , wid , wr ) {
continue
2020-04-27 18:37:31 +00:00
}
2020-07-09 10:58:52 +00:00
windows [ wnd ] . allocated . add ( wr , needRes )
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
selectedWindow = wnd
break
}
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
windows [ selectedWindow ] . todo = append ( windows [ selectedWindow ] . todo , task )
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
heap . Remove ( sh . schedQueue , sqi )
sqi --
scheduled ++
}
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
// Step 3
2020-03-23 11:40:02 +00:00
2020-07-09 10:58:52 +00:00
if scheduled == 0 {
return
}
2020-04-27 20:59:17 +00:00
2020-07-09 10:58:52 +00:00
scheduledWindows := map [ int ] struct { } { }
for wnd , window := range windows {
if len ( window . todo ) == 0 {
// Nothing scheduled here, keep the window open
continue
}
2020-04-27 20:59:17 +00:00
2020-07-09 10:58:52 +00:00
scheduledWindows [ wnd ] = struct { } { }
2020-04-27 20:59:17 +00:00
2020-07-09 10:58:52 +00:00
select {
case sh . openWindows [ wnd ] . done <- & window :
default :
log . Error ( "expected sh.openWindows[wnd].done to be buffered" )
2020-04-27 20:59:17 +00:00
}
2020-07-09 10:58:52 +00:00
}
2020-04-27 20:59:17 +00:00
2020-07-09 10:58:52 +00:00
// Rewrite sh.openWindows array, removing scheduled windows
newOpenWindows := make ( [ ] * schedWindowRequest , 0 , len ( sh . openWindows ) - len ( scheduledWindows ) )
for wnd , window := range sh . openWindows {
if _ , scheduled := scheduledWindows [ wnd ] ; ! scheduled {
// keep unscheduled windows open
continue
}
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
newOpenWindows = append ( newOpenWindows , window )
}
2020-04-27 18:37:31 +00:00
2020-07-09 10:58:52 +00:00
sh . openWindows = newOpenWindows
}
2020-04-27 20:43:42 +00:00
2020-07-09 10:58:52 +00:00
func ( sh * scheduler ) runWorker ( wid WorkerID ) {
w := sh . workers [ wid ]
2020-04-27 20:43:42 +00:00
2020-07-09 10:58:52 +00:00
go func ( ) {
for {
2020-04-28 10:31:08 +00:00
}
2020-04-27 18:37:31 +00:00
} ( )
2020-03-23 11:40:02 +00:00
}
2020-06-30 17:26:56 +00:00
func ( a * activeResources ) withResources ( id WorkerID , wr storiface . WorkerResources , r Resources , locker sync . Locker , cb func ( ) error ) error {
2020-07-09 10:58:52 +00:00
for ! a . canHandleRequest ( r , id , wr ) {
2020-04-27 20:43:42 +00:00
if a . cond == nil {
a . cond = sync . NewCond ( locker )
}
a . cond . Wait ( )
}
a . add ( wr , r )
err := cb ( )
a . free ( wr , r )
if a . cond != nil {
a . cond . Broadcast ( )
2020-03-23 11:40:02 +00:00
}
2020-04-27 20:43:42 +00:00
return err
}
func ( a * activeResources ) add ( wr storiface . WorkerResources , r Resources ) {
a . gpuUsed = r . CanGPU
if r . MultiThread ( ) {
a . cpuUse += wr . CPUs
} else {
a . cpuUse += uint64 ( r . Threads )
}
a . memUsedMin += r . MinMemory
a . memUsedMax += r . MaxMemory
}
func ( a * activeResources ) free ( wr storiface . WorkerResources , r Resources ) {
if r . CanGPU {
a . gpuUsed = false
}
if r . MultiThread ( ) {
a . cpuUse -= wr . CPUs
} else {
a . cpuUse -= uint64 ( r . Threads )
}
a . memUsedMin -= r . MinMemory
a . memUsedMax -= r . MaxMemory
}
2020-07-09 10:58:52 +00:00
func ( a * activeResources ) canHandleRequest ( needRes Resources , wid WorkerID , res storiface . WorkerResources ) bool {
2020-03-23 11:40:02 +00:00
// TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running)
2020-07-09 10:58:52 +00:00
minNeedMem := res . MemReserved + a . memUsedMin + needRes . MinMemory + needRes . BaseMinMemory
2020-03-23 11:40:02 +00:00
if minNeedMem > res . MemPhysical {
log . Debugf ( "sched: not scheduling on worker %d; not enough physical memory - need: %dM, have %dM" , wid , minNeedMem / mib , res . MemPhysical / mib )
2020-04-27 20:43:42 +00:00
return false
2020-03-23 11:40:02 +00:00
}
2020-07-09 10:58:52 +00:00
maxNeedMem := res . MemReserved + a . memUsedMax + needRes . MaxMemory + needRes . BaseMinMemory
2020-06-30 17:26:56 +00:00
2020-03-23 11:40:02 +00:00
if maxNeedMem > res . MemSwap + res . MemPhysical {
log . Debugf ( "sched: not scheduling on worker %d; not enough virtual memory - need: %dM, have %dM" , wid , maxNeedMem / mib , ( res . MemSwap + res . MemPhysical ) / mib )
2020-04-27 20:43:42 +00:00
return false
2020-03-23 11:40:02 +00:00
}
2020-04-27 18:37:31 +00:00
if needRes . MultiThread ( ) {
2020-07-09 10:58:52 +00:00
if a . cpuUse > 0 {
log . Debugf ( "sched: not scheduling on worker %d; multicore process needs %d threads, %d in use, target %d" , wid , res . CPUs , a . cpuUse , res . CPUs )
2020-04-27 20:43:42 +00:00
return false
2020-03-23 11:40:02 +00:00
}
2020-04-29 14:56:20 +00:00
} else {
2020-07-09 10:58:52 +00:00
if a . cpuUse + uint64 ( needRes . Threads ) > res . CPUs {
log . Debugf ( "sched: not scheduling on worker %d; not enough threads, need %d, %d in use, target %d" , wid , needRes . Threads , a . cpuUse , res . CPUs )
2020-04-29 14:56:20 +00:00
return false
}
2020-03-23 11:40:02 +00:00
}
if len ( res . GPUs ) > 0 && needRes . CanGPU {
2020-07-09 10:58:52 +00:00
if a . gpuUsed {
2020-03-23 11:40:02 +00:00
log . Debugf ( "sched: not scheduling on worker %d; GPU in use" , wid )
2020-04-27 20:43:42 +00:00
return false
2020-03-23 11:40:02 +00:00
}
}
2020-04-27 20:43:42 +00:00
return true
2020-03-23 11:40:02 +00:00
}
2020-04-29 14:04:05 +00:00
func ( a * activeResources ) utilization ( wr storiface . WorkerResources ) float64 {
var max float64
cpu := float64 ( a . cpuUse ) / float64 ( wr . CPUs )
max = cpu
2020-05-01 18:04:21 +00:00
memMin := float64 ( a . memUsedMin + wr . MemReserved ) / float64 ( wr . MemPhysical )
2020-04-29 14:04:05 +00:00
if memMin > max {
max = memMin
}
2020-05-01 18:04:21 +00:00
memMax := float64 ( a . memUsedMax + wr . MemReserved ) / float64 ( wr . MemPhysical + wr . MemSwap )
2020-04-29 14:04:05 +00:00
if memMax > max {
max = memMax
}
return max
}
2020-07-09 10:58:52 +00:00
func ( sh * scheduler ) newWorker ( w * workerHandle ) {
2020-04-27 18:37:31 +00:00
sh . workersLk . Lock ( )
2020-03-23 11:40:02 +00:00
2020-04-27 18:37:31 +00:00
id := sh . nextWorker
sh . workers [ id ] = w
sh . nextWorker ++
2020-05-01 15:29:27 +00:00
2020-05-01 18:00:17 +00:00
sh . workersLk . Unlock ( )
select {
case sh . watchClosing <- id :
case <- sh . closing :
return
}
2020-07-09 10:58:52 +00:00
sh . runWorker ( id )
2020-05-01 18:00:17 +00:00
}
2020-07-09 10:58:52 +00:00
func ( sh * scheduler ) dropWorker ( wid WorkerID ) {
2020-05-01 18:00:17 +00:00
sh . workersLk . Lock ( )
defer sh . workersLk . Unlock ( )
w := sh . workers [ wid ]
delete ( sh . workers , wid )
go func ( ) {
if err := w . w . Close ( ) ; err != nil {
log . Warnf ( "closing worker %d: %+v" , err )
}
} ( )
2020-03-23 11:40:02 +00:00
}
2020-03-24 23:49:45 +00:00
2020-04-27 18:37:31 +00:00
func ( sh * scheduler ) schedClose ( ) {
sh . workersLk . Lock ( )
defer sh . workersLk . Unlock ( )
2020-03-24 23:49:45 +00:00
2020-04-27 18:37:31 +00:00
for i , w := range sh . workers {
2020-03-24 23:49:45 +00:00
if err := w . w . Close ( ) ; err != nil {
log . Errorf ( "closing worker %d: %+v" , i , err )
}
}
}
2020-04-27 18:37:31 +00:00
func ( sh * scheduler ) Close ( ) error {
close ( sh . closing )
return nil
}