package sectorstorage import ( "context" "fmt" "io" "runtime" "sort" "sync" "testing" "time" "github.com/google/uuid" "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log/v2" "github.com/stretchr/testify/require" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/specs-storage/storage" ) func init() { InitWait = 10 * time.Millisecond } func TestWithPriority(t *testing.T) { ctx := context.Background() require.Equal(t, DefaultSchedPriority, getPriority(ctx)) ctx = WithPriority(ctx, 2222) require.Equal(t, 2222, getPriority(ctx)) } type schedTestWorker struct { name string taskTypes map[sealtasks.TaskType]struct{} paths []stores.StoragePath closed bool session uuid.UUID } func (s *schedTestWorker) SealPreCommit1(ctx context.Context, sector storage.SectorRef, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) SealPreCommit2(ctx context.Context, sector storage.SectorRef, pc1o storage.PreCommit1Out) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) SealCommit1(ctx context.Context, sector storage.SectorRef, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) SealCommit2(ctx context.Context, sector storage.SectorRef, c1o storage.Commit1Out) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) FinalizeSector(ctx context.Context, sector storage.SectorRef, keepUnsealed []storage.Range) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) ReleaseUnsealed(ctx context.Context, sector storage.SectorRef, safeToFree []storage.Range) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) Remove(ctx context.Context, sector storage.SectorRef) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) NewSector(ctx context.Context, sector storage.SectorRef) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) AddPiece(ctx context.Context, sector storage.SectorRef, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) MoveStorage(ctx context.Context, sector storage.SectorRef, types storiface.SectorFileType) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) Fetch(ctx context.Context, id storage.SectorRef, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) UnsealPiece(ctx context.Context, id storage.SectorRef, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) ReadPiece(ctx context.Context, writer io.Writer, id storage.SectorRef, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { panic("implement me") } func (s *schedTestWorker) TaskTypes(ctx context.Context) (map[sealtasks.TaskType]struct{}, error) { return s.taskTypes, nil } func (s *schedTestWorker) Paths(ctx context.Context) ([]stores.StoragePath, error) { return s.paths, nil } var decentWorkerResources = storiface.WorkerResources{ MemPhysical: 128 << 30, MemSwap: 200 << 30, MemReserved: 2 << 30, CPUs: 32, GPUs: []string{"a GPU"}, } func (s *schedTestWorker) Info(ctx context.Context) (storiface.WorkerInfo, error) { return storiface.WorkerInfo{ Hostname: s.name, Resources: decentWorkerResources, }, nil } func (s *schedTestWorker) Session(context.Context) (uuid.UUID, error) { return s.session, nil } func (s *schedTestWorker) Close() error { if !s.closed { log.Info("close schedTestWorker") s.closed = true s.session = uuid.UUID{} } return nil } var _ Worker = &schedTestWorker{} func addTestWorker(t *testing.T, sched *scheduler, index *stores.Index, name string, taskTypes map[sealtasks.TaskType]struct{}) { w := &schedTestWorker{ name: name, taskTypes: taskTypes, paths: []stores.StoragePath{{ID: "bb-8", Weight: 2, LocalPath: "<octopus>food</octopus>", CanSeal: true, CanStore: true}}, session: uuid.New(), } for _, path := range w.paths { err := index.StorageAttach(context.TODO(), stores.StorageInfo{ ID: path.ID, URLs: nil, Weight: path.Weight, CanSeal: path.CanSeal, CanStore: path.CanStore, }, fsutil.FsStat{ Capacity: 1 << 40, Available: 1 << 40, Reserved: 3, }) require.NoError(t, err) } require.NoError(t, sched.runWorker(context.TODO(), w)) } func TestSchedStartStop(t *testing.T) { sched := newScheduler() go sched.runSched() addTestWorker(t, sched, stores.NewIndex(), "fred", nil) require.NoError(t, sched.Close(context.TODO())) } func TestSched(t *testing.T) { ctx, done := context.WithTimeout(context.Background(), 30*time.Second) defer done() spt := abi.RegisteredSealProof_StackedDrg32GiBV1 type workerSpec struct { name string taskTypes map[sealtasks.TaskType]struct{} } noopAction := func(ctx context.Context, w Worker) error { return nil } type runMeta struct { done map[string]chan struct{} wg sync.WaitGroup } type task func(*testing.T, *scheduler, *stores.Index, *runMeta) sched := func(taskName, expectWorker string, sid abi.SectorNumber, taskType sealtasks.TaskType) task { _, _, l, _ := runtime.Caller(1) _, _, l2, _ := runtime.Caller(2) return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { done := make(chan struct{}) rm.done[taskName] = done sel := newAllocSelector(index, storiface.FTCache, storiface.PathSealing) rm.wg.Add(1) go func() { defer rm.wg.Done() sectorRef := storage.SectorRef{ ID: abi.SectorID{ Miner: 8, Number: sid, }, ProofType: spt, } err := sched.Schedule(ctx, sectorRef, taskType, sel, func(ctx context.Context, w Worker) error { wi, err := w.Info(ctx) require.NoError(t, err) require.Equal(t, expectWorker, wi.Hostname) log.Info("IN ", taskName) for { _, ok := <-done if !ok { break } } log.Info("OUT ", taskName) return nil }, noopAction) require.NoError(t, err, fmt.Sprint(l, l2)) }() <-sched.testSync } } taskStarted := func(name string) task { _, _, l, _ := runtime.Caller(1) _, _, l2, _ := runtime.Caller(2) return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { select { case rm.done[name] <- struct{}{}: case <-ctx.Done(): t.Fatal("ctx error", ctx.Err(), l, l2) } } } taskDone := func(name string) task { _, _, l, _ := runtime.Caller(1) _, _, l2, _ := runtime.Caller(2) return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { select { case rm.done[name] <- struct{}{}: case <-ctx.Done(): t.Fatal("ctx error", ctx.Err(), l, l2) } close(rm.done[name]) } } taskNotScheduled := func(name string) task { _, _, l, _ := runtime.Caller(1) _, _, l2, _ := runtime.Caller(2) return func(t *testing.T, sched *scheduler, index *stores.Index, rm *runMeta) { select { case rm.done[name] <- struct{}{}: t.Fatal("not expected", l, l2) case <-time.After(10 * time.Millisecond): // TODO: better synchronization thingy } } } testFunc := func(workers []workerSpec, tasks []task) func(t *testing.T) { ParallelNum = 1 ParallelDenom = 1 return func(t *testing.T) { index := stores.NewIndex() sched := newScheduler() sched.testSync = make(chan struct{}) go sched.runSched() for _, worker := range workers { addTestWorker(t, sched, index, worker.name, worker.taskTypes) } rm := runMeta{ done: map[string]chan struct{}{}, } for i, task := range tasks { log.Info("TASK", i) task(t, sched, index, &rm) } log.Info("wait for async stuff") rm.wg.Wait() require.NoError(t, sched.Close(context.TODO())) } } multTask := func(tasks ...task) task { return func(t *testing.T, s *scheduler, index *stores.Index, meta *runMeta) { for _, tsk := range tasks { tsk(t, s, index, meta) } } } t.Run("one-pc1", testFunc([]workerSpec{ {name: "fred", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}}}, }, []task{ sched("pc1-1", "fred", 8, sealtasks.TTPreCommit1), taskDone("pc1-1"), })) t.Run("pc1-2workers-1", testFunc([]workerSpec{ {name: "fred2", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit2: {}}}, {name: "fred1", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}}}, }, []task{ sched("pc1-1", "fred1", 8, sealtasks.TTPreCommit1), taskDone("pc1-1"), })) t.Run("pc1-2workers-2", testFunc([]workerSpec{ {name: "fred1", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}}}, {name: "fred2", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit2: {}}}, }, []task{ sched("pc1-1", "fred1", 8, sealtasks.TTPreCommit1), taskDone("pc1-1"), })) t.Run("pc1-block-pc2", testFunc([]workerSpec{ {name: "fred", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}, sealtasks.TTPreCommit2: {}}}, }, []task{ sched("pc1", "fred", 8, sealtasks.TTPreCommit1), taskStarted("pc1"), sched("pc2", "fred", 8, sealtasks.TTPreCommit2), taskNotScheduled("pc2"), taskDone("pc1"), taskDone("pc2"), })) t.Run("pc2-block-pc1", testFunc([]workerSpec{ {name: "fred", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}, sealtasks.TTPreCommit2: {}}}, }, []task{ sched("pc2", "fred", 8, sealtasks.TTPreCommit2), taskStarted("pc2"), sched("pc1", "fred", 8, sealtasks.TTPreCommit1), taskNotScheduled("pc1"), taskDone("pc2"), taskDone("pc1"), })) t.Run("pc1-batching", testFunc([]workerSpec{ {name: "fred", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}}}, }, []task{ sched("t1", "fred", 8, sealtasks.TTPreCommit1), taskStarted("t1"), sched("t2", "fred", 8, sealtasks.TTPreCommit1), taskStarted("t2"), // with worker settings, we can only run 2 parallel PC1s // start 2 more to fill fetch buffer sched("t3", "fred", 8, sealtasks.TTPreCommit1), taskNotScheduled("t3"), sched("t4", "fred", 8, sealtasks.TTPreCommit1), taskNotScheduled("t4"), taskDone("t1"), taskDone("t2"), taskStarted("t3"), taskStarted("t4"), taskDone("t3"), taskDone("t4"), })) twoPC1 := func(prefix string, sid abi.SectorNumber, schedAssert func(name string) task) task { return multTask( sched(prefix+"-a", "fred", sid, sealtasks.TTPreCommit1), schedAssert(prefix+"-a"), sched(prefix+"-b", "fred", sid+1, sealtasks.TTPreCommit1), schedAssert(prefix+"-b"), ) } twoPC1Act := func(prefix string, schedAssert func(name string) task) task { return multTask( schedAssert(prefix+"-a"), schedAssert(prefix+"-b"), ) } diag := func() task { return func(t *testing.T, s *scheduler, index *stores.Index, meta *runMeta) { time.Sleep(20 * time.Millisecond) for _, request := range s.diag().Requests { log.Infof("!!! sDIAG: sid(%d) task(%s)", request.Sector.Number, request.TaskType) } wj := (&Manager{sched: s}).WorkerJobs() type line struct { storiface.WorkerJob wid uuid.UUID } lines := make([]line, 0) for wid, jobs := range wj { for _, job := range jobs { lines = append(lines, line{ WorkerJob: job, wid: wid, }) } } // oldest first sort.Slice(lines, func(i, j int) bool { if lines[i].RunWait != lines[j].RunWait { return lines[i].RunWait < lines[j].RunWait } return lines[i].Start.Before(lines[j].Start) }) for _, l := range lines { log.Infof("!!! wDIAG: rw(%d) sid(%d) t(%s)", l.RunWait, l.Sector.Number, l.Task) } } } // run this one a bunch of times, it had a very annoying tendency to fail randomly for i := 0; i < 40; i++ { t.Run("pc1-pc2-prio", testFunc([]workerSpec{ {name: "fred", taskTypes: map[sealtasks.TaskType]struct{}{sealtasks.TTPreCommit1: {}, sealtasks.TTPreCommit2: {}}}, }, []task{ // fill queues twoPC1("w0", 0, taskStarted), twoPC1("w1", 2, taskNotScheduled), sched("w2", "fred", 4, sealtasks.TTPreCommit1), taskNotScheduled("w2"), // windowed sched("t1", "fred", 8, sealtasks.TTPreCommit1), taskNotScheduled("t1"), sched("t2", "fred", 9, sealtasks.TTPreCommit1), taskNotScheduled("t2"), sched("t3", "fred", 10, sealtasks.TTPreCommit2), taskNotScheduled("t3"), diag(), twoPC1Act("w0", taskDone), twoPC1Act("w1", taskStarted), taskNotScheduled("w2"), twoPC1Act("w1", taskDone), taskStarted("w2"), taskDone("w2"), diag(), taskStarted("t3"), taskNotScheduled("t1"), taskNotScheduled("t2"), taskDone("t3"), taskStarted("t1"), taskStarted("t2"), taskDone("t1"), taskDone("t2"), })) } } type slowishSelector bool func (s slowishSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *workerHandle) (bool, error) { time.Sleep(200 * time.Microsecond) return bool(s), nil } func (s slowishSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *workerHandle) (bool, error) { time.Sleep(100 * time.Microsecond) return true, nil } var _ WorkerSelector = slowishSelector(true) func BenchmarkTrySched(b *testing.B) { logging.SetAllLoggers(logging.LevelInfo) defer logging.SetAllLoggers(logging.LevelDebug) ctx := context.Background() test := func(windows, queue int) func(b *testing.B) { return func(b *testing.B) { for i := 0; i < b.N; i++ { b.StopTimer() sched := newScheduler() sched.workers[WorkerID{}] = &workerHandle{ workerRpc: nil, info: storiface.WorkerInfo{ Hostname: "t", Resources: decentWorkerResources, }, preparing: &activeResources{}, active: &activeResources{}, } for i := 0; i < windows; i++ { sched.openWindows = append(sched.openWindows, &schedWindowRequest{ worker: WorkerID{}, done: make(chan *schedWindow, 1000), }) } for i := 0; i < queue; i++ { sched.schedQueue.Push(&workerRequest{ taskType: sealtasks.TTCommit2, sel: slowishSelector(true), ctx: ctx, }) } b.StartTimer() sched.trySched() } } } b.Run("1w-1q", test(1, 1)) b.Run("500w-1q", test(500, 1)) b.Run("1w-500q", test(1, 500)) b.Run("200w-400q", test(200, 400)) } func TestWindowCompact(t *testing.T) { sh := scheduler{} spt := abi.RegisteredSealProof_StackedDrg32GiBV1 test := func(start [][]sealtasks.TaskType, expect [][]sealtasks.TaskType) func(t *testing.T) { return func(t *testing.T) { wh := &workerHandle{ info: storiface.WorkerInfo{ Resources: decentWorkerResources, }, } for _, windowTasks := range start { window := &schedWindow{} for _, task := range windowTasks { window.todo = append(window.todo, &workerRequest{ taskType: task, sector: storage.SectorRef{ProofType: spt}, }) window.allocated.add(wh.info.Resources, ResourceTable[task][spt]) } wh.activeWindows = append(wh.activeWindows, window) } sw := schedWorker{ sched: &sh, worker: wh, } sw.workerCompactWindows() require.Equal(t, len(start)-len(expect), -sw.windowsRequested) for wi, tasks := range expect { var expectRes activeResources for ti, task := range tasks { require.Equal(t, task, wh.activeWindows[wi].todo[ti].taskType, "%d, %d", wi, ti) expectRes.add(wh.info.Resources, ResourceTable[task][spt]) } require.Equal(t, expectRes.cpuUse, wh.activeWindows[wi].allocated.cpuUse, "%d", wi) require.Equal(t, expectRes.gpuUsed, wh.activeWindows[wi].allocated.gpuUsed, "%d", wi) require.Equal(t, expectRes.memUsedMin, wh.activeWindows[wi].allocated.memUsedMin, "%d", wi) require.Equal(t, expectRes.memUsedMax, wh.activeWindows[wi].allocated.memUsedMax, "%d", wi) } } } t.Run("2-pc1-windows", test( [][]sealtasks.TaskType{{sealtasks.TTPreCommit1}, {sealtasks.TTPreCommit1}}, [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1}}), ) t.Run("1-window", test( [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1}}, [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1}}), ) t.Run("2-pc2-windows", test( [][]sealtasks.TaskType{{sealtasks.TTPreCommit2}, {sealtasks.TTPreCommit2}}, [][]sealtasks.TaskType{{sealtasks.TTPreCommit2}, {sealtasks.TTPreCommit2}}), ) t.Run("2pc1-pc1ap", test( [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1}, {sealtasks.TTPreCommit1, sealtasks.TTAddPiece}}, [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1, sealtasks.TTAddPiece}, {sealtasks.TTPreCommit1}}), ) t.Run("2pc1-pc1appc2", test( [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1}, {sealtasks.TTPreCommit1, sealtasks.TTAddPiece, sealtasks.TTPreCommit2}}, [][]sealtasks.TaskType{{sealtasks.TTPreCommit1, sealtasks.TTPreCommit1, sealtasks.TTAddPiece}, {sealtasks.TTPreCommit1, sealtasks.TTPreCommit2}}), ) }