Merge pull request #9116 from filecoin-project/feat/worker-name-set
feat: sealing: Allow overriding worker hostname
This commit is contained in:
commit
ab0592231c
@ -159,6 +159,12 @@ var runCmd = &cli.Command{
|
||||
Usage: "don't use swap",
|
||||
Value: false,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "name",
|
||||
Usage: "custom worker name",
|
||||
EnvVars: []string{"LOTUS_WORKER_NAME"},
|
||||
DefaultText: "hostname",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "addpiece",
|
||||
Usage: "enable addpiece",
|
||||
@ -513,6 +519,7 @@ var runCmd = &cli.Command{
|
||||
NoSwap: cctx.Bool("no-swap"),
|
||||
MaxParallelChallengeReads: cctx.Int("post-parallel-reads"),
|
||||
ChallengeReadTimeout: cctx.Duration("post-read-timeout"),
|
||||
Name: cctx.String("name"),
|
||||
}, remote, localStore, nodeApi, nodeApi, wsts),
|
||||
LocalStore: localStore,
|
||||
Storage: lr,
|
||||
|
@ -41,6 +41,7 @@ OPTIONS:
|
||||
--addpiece enable addpiece (default: true)
|
||||
--commit enable commit (32G sectors: all cores or GPUs, 128GiB Memory + 64GiB swap) (default: true)
|
||||
--listen value host address and port the worker api will listen on (default: "0.0.0.0:3456")
|
||||
--name value custom worker name (default: hostname) [$LOTUS_WORKER_NAME]
|
||||
--no-default disable all default compute tasks, use the worker for storage/fetching only (default: false)
|
||||
--no-local-storage don't use storageminer repo for sector storage (default: false)
|
||||
--no-swap don't use swap (default: false)
|
||||
|
@ -622,6 +622,13 @@
|
||||
# env var: LOTUS_STORAGE_ALLOWREGENSECTORKEY
|
||||
#AllowRegenSectorKey = true
|
||||
|
||||
# LocalWorkerName specifies a custom name for the builtin worker.
|
||||
# If set to an empty string (default) os hostname will be used
|
||||
#
|
||||
# type: string
|
||||
# env var: LOTUS_STORAGE_LOCALWORKERNAME
|
||||
#LocalWorkerName = ""
|
||||
|
||||
# Assigner specifies the worker assigner to use when scheduling tasks.
|
||||
# "utilization" (default) - assign tasks to workers with lowest utilization.
|
||||
# "spread" - assign tasks to as many distinct workers as possible.
|
||||
|
@ -727,6 +727,7 @@ func (n *Ensemble) Start() *Ensemble {
|
||||
LocalWorker: sectorstorage.NewLocalWorker(sectorstorage.WorkerConfig{
|
||||
TaskTypes: m.options.workerTasks,
|
||||
NoSwap: false,
|
||||
Name: m.options.workerName,
|
||||
}, store, localStore, m.MinerNode, m.MinerNode, wsts),
|
||||
LocalStore: localStore,
|
||||
Storage: lr,
|
||||
|
@ -47,6 +47,7 @@ type nodeOpts struct {
|
||||
|
||||
workerTasks []sealtasks.TaskType
|
||||
workerStorageOpt func(paths.Store) paths.Store
|
||||
workerName string
|
||||
}
|
||||
|
||||
// DefaultNodeOpts are the default options that will be applied to test nodes.
|
||||
@ -219,6 +220,13 @@ func WithTaskTypes(tt []sealtasks.TaskType) NodeOpt {
|
||||
}
|
||||
}
|
||||
|
||||
func WithWorkerName(n string) NodeOpt {
|
||||
return func(opts *nodeOpts) error {
|
||||
opts.workerName = n
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var WithSealWorkerTasks = WithTaskTypes([]sealtasks.TaskType{sealtasks.TTFetch, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTPreCommit2, sealtasks.TTCommit2, sealtasks.TTUnseal})
|
||||
|
||||
func WithWorkerStorage(transform func(paths.Store) paths.Store) NodeOpt {
|
||||
|
@ -401,3 +401,28 @@ func TestWindowPostWorkerManualPoSt(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Len(t, lastPending, 0)
|
||||
}
|
||||
|
||||
func TestWorkerName(t *testing.T) {
|
||||
name := "thisstringisprobablynotahostnameihope"
|
||||
|
||||
ctx := context.Background()
|
||||
_, miner, worker, ens := kit.EnsembleWorker(t, kit.WithAllSubsystems(), kit.ThroughRPC(), kit.WithWorkerName(name))
|
||||
|
||||
ens.InterconnectAll().BeginMining(50 * time.Millisecond)
|
||||
|
||||
e, err := worker.Info(ctx)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, name, e.Hostname)
|
||||
|
||||
ws, err := miner.WorkerStats(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
var found bool
|
||||
for _, stats := range ws {
|
||||
if stats.Info.Hostname == name {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
require.True(t, found)
|
||||
}
|
||||
|
@ -844,6 +844,13 @@ This parameter is ONLY applicable if the retrieval pricing policy strategy has b
|
||||
|
||||
Comment: ``,
|
||||
},
|
||||
{
|
||||
Name: "LocalWorkerName",
|
||||
Type: "string",
|
||||
|
||||
Comment: `LocalWorkerName specifies a custom name for the builtin worker.
|
||||
If set to an empty string (default) os hostname will be used`,
|
||||
},
|
||||
{
|
||||
Name: "Assigner",
|
||||
Type: "string",
|
||||
|
@ -65,6 +65,8 @@ func (c *StorageMiner) StorageManager() sealer.Config {
|
||||
ResourceFiltering: c.Storage.ResourceFiltering,
|
||||
DisallowRemoteFinalize: c.Storage.DisallowRemoteFinalize,
|
||||
|
||||
LocalWorkerName: c.Storage.LocalWorkerName,
|
||||
|
||||
Assigner: c.Storage.Assigner,
|
||||
|
||||
ParallelCheckLimit: c.Proving.ParallelCheckLimit,
|
||||
|
@ -401,6 +401,10 @@ type SealerConfig struct {
|
||||
AllowProveReplicaUpdate2 bool
|
||||
AllowRegenSectorKey bool
|
||||
|
||||
// LocalWorkerName specifies a custom name for the builtin worker.
|
||||
// If set to an empty string (default) os hostname will be used
|
||||
LocalWorkerName string
|
||||
|
||||
// Assigner specifies the worker assigner to use when scheduling tasks.
|
||||
// "utilization" (default) - assign tasks to workers with lowest utilization.
|
||||
// "spread" - assign tasks to as many distinct workers as possible.
|
||||
|
@ -116,6 +116,8 @@ type Config struct {
|
||||
AllowProveReplicaUpdate2 bool
|
||||
AllowRegenSectorKey bool
|
||||
|
||||
LocalWorkerName string
|
||||
|
||||
// ResourceFiltering instructs the system which resource filtering strategy
|
||||
// to use when evaluating tasks against this worker. An empty value defaults
|
||||
// to "hardware".
|
||||
@ -207,6 +209,7 @@ func New(ctx context.Context, lstor *paths.Local, stor paths.Store, ls paths.Loc
|
||||
wcfg := WorkerConfig{
|
||||
IgnoreResourceFiltering: sc.ResourceFiltering == ResourceFilteringDisabled,
|
||||
TaskTypes: localTasks,
|
||||
Name: sc.LocalWorkerName,
|
||||
}
|
||||
worker := NewLocalWorker(wcfg, stor, lstor, si, m, wss)
|
||||
err = m.AddWorker(ctx, worker)
|
||||
|
@ -34,6 +34,9 @@ type WorkerConfig struct {
|
||||
TaskTypes []sealtasks.TaskType
|
||||
NoSwap bool
|
||||
|
||||
// os.Hostname if not set
|
||||
Name string
|
||||
|
||||
// IgnoreResourceFiltering enables task distribution to happen on this
|
||||
// worker regardless of its currently available resources. Used in testing
|
||||
// with the local worker.
|
||||
@ -56,6 +59,8 @@ type LocalWorker struct {
|
||||
noSwap bool
|
||||
envLookup EnvFunc
|
||||
|
||||
name string
|
||||
|
||||
// see equivalent field on WorkerConfig.
|
||||
ignoreResources bool
|
||||
|
||||
@ -83,6 +88,7 @@ func newLocalWorker(executor ExecutorFunc, wcfg WorkerConfig, envLookup EnvFunc,
|
||||
localStore: local,
|
||||
sindex: sindex,
|
||||
ret: ret,
|
||||
name: wcfg.Name,
|
||||
|
||||
ct: &workerCallTracker{
|
||||
st: cst,
|
||||
@ -97,6 +103,14 @@ func newLocalWorker(executor ExecutorFunc, wcfg WorkerConfig, envLookup EnvFunc,
|
||||
closing: make(chan struct{}),
|
||||
}
|
||||
|
||||
if w.name == "" {
|
||||
var err error
|
||||
w.name, err = os.Hostname()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
if wcfg.MaxParallelChallengeReads > 0 {
|
||||
w.challengeThrottle = make(chan struct{}, wcfg.MaxParallelChallengeReads)
|
||||
}
|
||||
@ -113,13 +127,7 @@ func newLocalWorker(executor ExecutorFunc, wcfg WorkerConfig, envLookup EnvFunc,
|
||||
|
||||
go func() {
|
||||
for _, call := range unfinished {
|
||||
hostname, osErr := os.Hostname()
|
||||
if osErr != nil {
|
||||
log.Errorf("get hostname err: %+v", err)
|
||||
hostname = ""
|
||||
}
|
||||
|
||||
err := storiface.Err(storiface.ErrTempWorkerRestart, xerrors.Errorf("worker [Hostname: %s] restarted", hostname))
|
||||
err := storiface.Err(storiface.ErrTempWorkerRestart, xerrors.Errorf("worker [name: %s] restarted", w.name))
|
||||
|
||||
// TODO: Handle restarting PC1 once support is merged
|
||||
|
||||
@ -283,12 +291,7 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector storiface.SectorRef,
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
hostname, osErr := os.Hostname()
|
||||
if osErr != nil {
|
||||
log.Errorf("get hostname err: %+v", err)
|
||||
}
|
||||
|
||||
err = xerrors.Errorf("%w [Hostname: %s]", err, hostname)
|
||||
err = xerrors.Errorf("%w [name: %s]", err, l.name)
|
||||
}
|
||||
|
||||
if doReturn(ctx, rt, ci, l.ret, res, toCallError(err)) {
|
||||
@ -774,11 +777,6 @@ func (l *LocalWorker) memInfo() (memPhysical, memUsed, memSwap, memSwapUsed uint
|
||||
}
|
||||
|
||||
func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
|
||||
hostname, err := os.Hostname() // TODO: allow overriding from config
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
gpus, err := ffi.GetGPUDevices()
|
||||
if err != nil {
|
||||
log.Errorf("getting gpu devices failed: %+v", err)
|
||||
@ -797,7 +795,7 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
|
||||
}
|
||||
|
||||
return storiface.WorkerInfo{
|
||||
Hostname: hostname,
|
||||
Hostname: l.name,
|
||||
IgnoreResources: l.ignoreResources,
|
||||
Resources: storiface.WorkerResources{
|
||||
MemPhysical: memPhysical,
|
||||
|
Loading…
Reference in New Issue
Block a user