diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index ab9924c02..1184724b5 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -542,6 +542,13 @@ # env var: LOTUS_STORAGE_ASSIGNER #Assigner = "utilization" + # If you see stuck Finalize tasks after enabling this setting, check + # 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]' + # + # type: bool + # env var: LOTUS_STORAGE_DISALLOWREMOTEFINALIZE + #DisallowRemoteFinalize = false + # ResourceFiltering instructs the system which resource filtering strategy # to use when evaluating tasks against this worker. An empty value defaults # to "hardware". diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index fc8589bbf..1abe04fa1 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -71,7 +71,8 @@ type Manager struct { workLk sync.Mutex work *statestore.StateStore - parallelCheckLimit int + parallelCheckLimit int + disallowRemoteFinalize bool callToWork map[storiface.CallID]WorkID // used when we get an early return and there's no callToWork mapping @@ -123,6 +124,8 @@ type Config struct { // PoSt config ParallelCheckLimit int + DisallowRemoteFinalize bool + Assigner string } @@ -155,7 +158,8 @@ func New(ctx context.Context, lstor *stores.Local, stor stores.Store, ls stores. localProver: prover, - parallelCheckLimit: sc.ParallelCheckLimit, + parallelCheckLimit: sc.ParallelCheckLimit, + disallowRemoteFinalize: sc.DisallowRemoteFinalize, work: mss, callToWork: map[storiface.CallID]WorkID{}, @@ -634,7 +638,7 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, } // get a selector for moving stuff into long-term storage - fetchSel := newMoveSelector(m.index, sector.ID, storiface.FTCache|storiface.FTSealed, storiface.PathStorage) + fetchSel := newMoveSelector(m.index, sector.ID, storiface.FTCache|storiface.FTSealed, storiface.PathStorage, !m.disallowRemoteFinalize) // only move the unsealed file if it still exists and needs moving moveUnsealed := unsealed @@ -712,7 +716,7 @@ func (m *Manager) FinalizeReplicaUpdate(ctx context.Context, sector storage.Sect move := func(types storiface.SectorFileType) error { // get a selector for moving stuff into long-term storage - fetchSel := newMoveSelector(m.index, sector.ID, types, storiface.PathStorage) + fetchSel := newMoveSelector(m.index, sector.ID, types, storiface.PathStorage, !m.disallowRemoteFinalize) { if len(keepUnsealed) == 0 { moveUnsealed = storiface.FTNone diff --git a/extern/sector-storage/selector_move.go b/extern/sector-storage/selector_move.go index 1fb4c9457..7c63691b1 100644 --- a/extern/sector-storage/selector_move.go +++ b/extern/sector-storage/selector_move.go @@ -13,18 +13,20 @@ import ( ) type moveSelector struct { - index stores.SectorIndex - sector abi.SectorID - alloc storiface.SectorFileType - destPtype storiface.PathType + index stores.SectorIndex + sector abi.SectorID + alloc storiface.SectorFileType + destPtype storiface.PathType + allowRemote bool } -func newMoveSelector(index stores.SectorIndex, sector abi.SectorID, alloc storiface.SectorFileType, destPtype storiface.PathType) *moveSelector { +func newMoveSelector(index stores.SectorIndex, sector abi.SectorID, alloc storiface.SectorFileType, destPtype storiface.PathType, allowRemote bool) *moveSelector { return &moveSelector{ - index: index, - sector: sector, - alloc: alloc, - destPtype: destPtype, + index: index, + sector: sector, + alloc: alloc, + destPtype: destPtype, + allowRemote: allowRemote, } } @@ -86,7 +88,7 @@ func (s *moveSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi. } } - return ok, false, nil + return ok && s.allowRemote, false, nil } func (s *moveSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index a9c7ed8f2..ceacaca42 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -763,6 +763,13 @@ This parameter is ONLY applicable if the retrieval pricing policy strategy has b Comment: `Assigner specifies the worker assigner to use when scheduling tasks. "utilization" (default) - assign tasks to workers with lowest utilization. "spread" - assign tasks to as many distinct workers as possible.`, + }, + { + Name: "DisallowRemoteFinalize", + Type: "bool", + + Comment: `If you see stuck Finalize tasks after enabling this setting, check +'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]'`, }, { Name: "ResourceFiltering", diff --git a/node/config/storage.go b/node/config/storage.go index 6ab35984a..de65dc60a 100644 --- a/node/config/storage.go +++ b/node/config/storage.go @@ -63,6 +63,7 @@ func (c *StorageMiner) StorageManager() sectorstorage.Config { AllowProveReplicaUpdate2: c.Storage.AllowProveReplicaUpdate2, AllowRegenSectorKey: c.Storage.AllowRegenSectorKey, ResourceFiltering: c.Storage.ResourceFiltering, + DisallowRemoteFinalize: c.Storage.DisallowRemoteFinalize, Assigner: c.Storage.Assigner, diff --git a/node/config/types.go b/node/config/types.go index 0e5bcbd5f..c79f70876 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -335,6 +335,19 @@ type SealerConfig struct { // "spread" - assign tasks to as many distinct workers as possible. Assigner string + // DisallowRemoteFinalize when set to true will force all Finalize tasks to + // run on workers with local access to both long-term storage and the sealing + // path containing the sector. + // + // WARNING: Only set this if all workers have access to long-term storage + // paths. If this flag is enabled, and there are workers without long-term + // storage access, sectors will not be moved from them, and Finalize tasks + // will appear to be stuck. + // + // If you see stuck Finalize tasks after enabling this setting, check + // 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]' + DisallowRemoteFinalize bool + // ResourceFiltering instructs the system which resource filtering strategy // to use when evaluating tasks against this worker. An empty value defaults // to "hardware".