lotus/cmd/lotus-worker/main.go

882 lines
24 KiB
Go
Raw Normal View History

package main
2019-11-21 00:52:59 +00:00
import (
2020-03-13 01:37:38 +00:00
"context"
2020-03-16 18:46:02 +00:00
"encoding/json"
"fmt"
2020-03-16 17:50:07 +00:00
"net"
2020-03-13 01:37:38 +00:00
"net/http"
2019-11-21 00:52:59 +00:00
"os"
"os/signal"
2020-03-16 18:46:02 +00:00
"path/filepath"
"reflect"
"strings"
"time"
2020-03-16 18:46:02 +00:00
"github.com/google/uuid"
2020-09-14 07:44:55 +00:00
"github.com/ipfs/go-datastore/namespace"
logging "github.com/ipfs/go-log/v2"
"github.com/multiformats/go-multiaddr"
2020-08-30 18:28:58 +00:00
manet "github.com/multiformats/go-multiaddr/net"
"github.com/urfave/cli/v2"
2020-10-21 08:37:50 +00:00
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
2020-06-05 22:59:01 +00:00
"golang.org/x/xerrors"
2019-11-21 00:52:59 +00:00
2020-05-20 18:23:51 +00:00
"github.com/filecoin-project/go-jsonrpc/auth"
"github.com/filecoin-project/go-paramfetch"
2020-09-14 07:44:55 +00:00
"github.com/filecoin-project/go-statestore"
2020-03-23 11:40:02 +00:00
2020-03-11 01:57:52 +00:00
"github.com/filecoin-project/lotus/api"
2019-11-21 00:52:59 +00:00
"github.com/filecoin-project/lotus/build"
2020-03-11 01:57:52 +00:00
lcli "github.com/filecoin-project/lotus/cli"
cliutil "github.com/filecoin-project/lotus/cli/util"
"github.com/filecoin-project/lotus/cmd/lotus-worker/sealworker"
2020-01-08 13:49:34 +00:00
"github.com/filecoin-project/lotus/lib/lotuslog"
"github.com/filecoin-project/lotus/lib/ulimit"
2020-10-21 08:37:50 +00:00
"github.com/filecoin-project/lotus/metrics"
2020-09-14 07:44:55 +00:00
"github.com/filecoin-project/lotus/node/modules"
"github.com/filecoin-project/lotus/node/repo"
"github.com/filecoin-project/lotus/storage/paths"
"github.com/filecoin-project/lotus/storage/sealer"
2023-08-18 15:13:57 +00:00
"github.com/filecoin-project/lotus/storage/sealer/ffiwrapper"
"github.com/filecoin-project/lotus/storage/sealer/sealtasks"
"github.com/filecoin-project/lotus/storage/sealer/storiface"
2019-11-21 00:52:59 +00:00
)
var log = logging.Logger("main")
const FlagWorkerRepo = "worker-repo"
2020-07-17 13:18:40 +00:00
2020-07-10 12:18:09 +00:00
// TODO remove after deprecation period
const FlagWorkerRepoDeprecation = "workerrepo"
2020-03-13 01:37:38 +00:00
2019-11-21 00:52:59 +00:00
func main() {
api.RunningNodeType = api.NodeWorker
2020-01-08 13:49:34 +00:00
lotuslog.SetupLogLevels()
2019-11-21 00:52:59 +00:00
local := []*cli.Command{
runCmd,
2022-07-29 22:33:30 +00:00
stopCmd,
2020-08-30 18:28:58 +00:00
infoCmd,
storageCmd,
setCmd,
waitQuietCmd,
resourcesCmd,
tasksCmd,
2019-11-21 00:52:59 +00:00
}
app := &cli.App{
2021-05-24 07:00:47 +00:00
Name: "lotus-worker",
Usage: "Remote miner worker",
Version: string(build.MinerUserVersion()),
2021-05-24 07:00:47 +00:00
EnableBashCompletion: true,
2019-11-21 00:52:59 +00:00
Flags: []cli.Flag{
&cli.StringFlag{
2020-07-08 10:38:59 +00:00
Name: FlagWorkerRepo,
2020-07-10 12:18:09 +00:00
Aliases: []string{FlagWorkerRepoDeprecation},
EnvVars: []string{"LOTUS_WORKER_PATH", "WORKER_PATH"},
2019-11-21 00:52:59 +00:00
Value: "~/.lotusworker", // TODO: Consider XDG_DATA_HOME
2020-07-10 12:18:09 +00:00
Usage: fmt.Sprintf("Specify worker repo path. flag %s and env WORKER_PATH are DEPRECATION, will REMOVE SOON", FlagWorkerRepoDeprecation),
2019-11-21 00:52:59 +00:00
},
2021-09-17 22:01:54 +00:00
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotusworker", // should follow --repo default
2021-09-17 22:01:54 +00:00
},
2019-11-21 00:52:59 +00:00
&cli.StringFlag{
2020-07-08 10:38:59 +00:00
Name: "miner-repo",
2020-07-10 12:18:09 +00:00
Aliases: []string{"storagerepo"},
EnvVars: []string{"LOTUS_MINER_PATH", "LOTUS_STORAGE_PATH"},
2020-07-08 10:38:59 +00:00
Value: "~/.lotusminer", // TODO: Consider XDG_DATA_HOME
2020-07-10 12:18:09 +00:00
Usage: fmt.Sprintf("Specify miner repo path. flag storagerepo and env LOTUS_STORAGE_PATH are DEPRECATION, will REMOVE SOON"),
2019-11-21 00:52:59 +00:00
},
2019-12-07 14:19:46 +00:00
&cli.BoolFlag{
Name: "enable-gpu-proving",
Usage: "enable use of GPU for mining operations",
Value: true,
EnvVars: []string{"LOTUS_WORKER_ENABLE_GPU_PROVING"},
2019-12-07 14:19:46 +00:00
},
2019-11-21 00:52:59 +00:00
},
2021-09-17 22:01:54 +00:00
After: func(c *cli.Context) error {
if r := recover(); r != nil {
2022-07-12 13:35:11 +00:00
// Generate report in LOTUS_PANIC_REPORT_PATH and re-raise panic
build.GenerateMinerPanicReport(c.String("panic-reports"), c.String(FlagWorkerRepo), c.App.Name)
2021-09-17 22:01:54 +00:00
panic(r)
}
return nil
},
2019-11-21 00:52:59 +00:00
Commands: local,
}
app.Setup()
2020-03-25 21:15:10 +00:00
app.Metadata["repoType"] = repo.Worker
2019-11-21 00:52:59 +00:00
if err := app.Run(os.Args); err != nil {
2019-11-21 18:38:43 +00:00
log.Warnf("%+v", err)
2019-11-21 00:52:59 +00:00
return
}
}
2022-07-29 22:33:30 +00:00
var stopCmd = &cli.Command{
Name: "stop",
Usage: "Stop a running lotus worker",
Flags: []cli.Flag{},
Action: func(cctx *cli.Context) error {
api, closer, err := lcli.GetWorkerAPI(cctx)
if err != nil {
return err
}
defer closer()
ctx := lcli.ReqContext(cctx)
// Detach any storage associated with this worker
2022-08-10 21:36:38 +00:00
err = api.StorageDetachAll(ctx)
if err != nil {
return err
}
2022-07-29 22:33:30 +00:00
err = api.Shutdown(ctx)
if err != nil {
return err
}
return nil
},
}
2019-11-21 00:52:59 +00:00
var runCmd = &cli.Command{
Name: "run",
Usage: "Start lotus worker",
2020-03-16 17:50:07 +00:00
Flags: []cli.Flag{
&cli.StringFlag{
2022-07-12 13:33:32 +00:00
Name: "listen",
Usage: "host address and port the worker api will listen on",
Value: "0.0.0.0:3456",
EnvVars: []string{"LOTUS_WORKER_LISTEN"},
2020-03-16 17:50:07 +00:00
},
&cli.StringFlag{
Name: "address",
Hidden: true,
},
2020-03-16 18:46:02 +00:00
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "no-local-storage",
Usage: "don't use storageminer repo for sector storage",
EnvVars: []string{"LOTUS_WORKER_NO_LOCAL_STORAGE"},
2020-03-16 18:46:02 +00:00
},
2020-09-30 06:23:35 +00:00
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "no-swap",
Usage: "don't use swap",
Value: false,
EnvVars: []string{"LOTUS_WORKER_NO_SWAP"},
2020-09-30 06:23:35 +00:00
},
&cli.StringFlag{
Name: "name",
Usage: "custom worker name",
EnvVars: []string{"LOTUS_WORKER_NAME"},
DefaultText: "hostname",
},
2020-08-14 14:06:53 +00:00
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "addpiece",
Usage: "enable addpiece",
Value: true,
EnvVars: []string{"LOTUS_WORKER_ADDPIECE"},
2020-08-14 14:06:53 +00:00
},
2020-03-25 21:15:10 +00:00
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "precommit1",
Usage: "enable precommit1",
2022-07-12 13:33:32 +00:00
Value: true,
EnvVars: []string{"LOTUS_WORKER_PRECOMMIT1"},
2020-03-25 21:15:10 +00:00
},
2020-08-04 12:32:09 +00:00
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "unseal",
Usage: "enable unsealing",
2022-07-12 13:33:32 +00:00
Value: true,
EnvVars: []string{"LOTUS_WORKER_UNSEAL"},
2020-08-04 12:32:09 +00:00
},
2020-03-25 21:15:10 +00:00
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "precommit2",
Usage: "enable precommit2",
2022-07-12 13:33:32 +00:00
Value: true,
EnvVars: []string{"LOTUS_WORKER_PRECOMMIT2"},
2020-03-25 21:15:10 +00:00
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "commit",
Usage: "enable commit",
2022-07-12 13:33:32 +00:00
Value: true,
EnvVars: []string{"LOTUS_WORKER_COMMIT"},
2020-03-25 21:15:10 +00:00
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "replica-update",
Usage: "enable replica update",
Value: true,
EnvVars: []string{"LOTUS_WORKER_REPLICA_UPDATE"},
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "prove-replica-update2",
Usage: "enable prove replica update 2",
Value: true,
EnvVars: []string{"LOTUS_WORKER_PROVE_REPLICA_UPDATE2"},
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "regen-sector-key",
Usage: "enable regen sector key",
Value: true,
EnvVars: []string{"LOTUS_WORKER_REGEN_SECTOR_KEY"},
},
&cli.BoolFlag{
Name: "sector-download",
Usage: "enable external sector data download",
Value: false,
EnvVars: []string{"LOTUS_WORKER_SECTOR_DOWNLOAD"},
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "windowpost",
Usage: "enable window post",
Value: false,
EnvVars: []string{"LOTUS_WORKER_WINDOWPOST"},
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "winningpost",
Usage: "enable winning post",
Value: false,
EnvVars: []string{"LOTUS_WORKER_WINNINGPOST"},
},
&cli.BoolFlag{
2022-07-12 13:33:32 +00:00
Name: "no-default",
Usage: "disable all default compute tasks, use the worker for storage/fetching only",
Value: false,
EnvVars: []string{"LOTUS_WORKER_NO_DEFAULT"},
},
2022-01-14 13:11:04 +00:00
&cli.IntFlag{
2022-07-12 13:33:32 +00:00
Name: "parallel-fetch-limit",
Usage: "maximum fetch operations to run in parallel",
Value: 5,
EnvVars: []string{"LOTUS_WORKER_PARALLEL_FETCH_LIMIT"},
2022-01-14 13:11:04 +00:00
},
&cli.IntFlag{
2022-07-12 13:33:32 +00:00
Name: "post-parallel-reads",
Usage: "maximum number of parallel challenge reads (0 = no limit)",
Value: 32,
2022-07-12 13:33:32 +00:00
EnvVars: []string{"LOTUS_WORKER_POST_PARALLEL_READS"},
},
&cli.DurationFlag{
2022-07-12 13:33:32 +00:00
Name: "post-read-timeout",
Usage: "time limit for reading PoSt challenges (0 = no limit)",
Value: 0,
EnvVars: []string{"LOTUS_WORKER_POST_READ_TIMEOUT"},
},
2022-01-14 13:11:04 +00:00
&cli.StringFlag{
2022-07-12 13:33:32 +00:00
Name: "timeout",
Usage: "used when 'listen' is unspecified. must be a valid duration recognized by golang's time.ParseDuration function",
Value: "30m",
EnvVars: []string{"LOTUS_WORKER_TIMEOUT"},
2022-01-14 13:11:04 +00:00
},
&cli.StringFlag{
Name: "http-server-timeout",
Value: "30s",
},
&cli.BoolFlag{
2023-05-08 17:22:25 +00:00
Name: "data-cid",
2023-05-08 20:39:47 +00:00
Usage: "Run the data-cid task. true|false",
2023-05-08 17:22:25 +00:00
Value: true,
DefaultText: "inherits --addpiece",
},
2023-08-18 15:13:57 +00:00
&cli.StringFlag{
Name: "external-pc2",
Usage: "command for computing PC2 externally",
},
2020-03-16 17:50:07 +00:00
},
2023-08-18 15:13:57 +00:00
Description: `Run lotus-worker.
--external-pc2 can be used to compute the PreCommit2 inputs externally.
The flag behaves similarly to the related lotus-worker flag, using it in
lotus-bench may be useful for testing if the external PreCommit2 command is
invoked correctly.
The command will be called with a number of environment variables set:
* EXTSEAL_PC2_SECTOR_NUM: the sector number
* EXTSEAL_PC2_SECTOR_MINER: the miner id
* EXTSEAL_PC2_PROOF_TYPE: the proof type
* EXTSEAL_PC2_SECTOR_SIZE: the sector size in bytes
* EXTSEAL_PC2_CACHE: the path to the cache directory
* EXTSEAL_PC2_SEALED: the path to the sealed sector file (initialized with unsealed data by the caller)
* EXTSEAL_PC2_PC1OUT: output from rust-fil-proofs precommit1 phase (base64 encoded json)
The command is expected to:
* Create cache sc-02-data-tree-r* files
* Create cache sc-02-data-tree-c* files
* Create cache p_aux / t_aux files
* Transform the sealed file in place
Example invocation of lotus-bench as external executor:
'./lotus-bench simple precommit2 --sector-size $EXTSEAL_PC2_SECTOR_SIZE $EXTSEAL_PC2_SEALED $EXTSEAL_PC2_CACHE $EXTSEAL_PC2_PC1OUT'
`,
Before: func(cctx *cli.Context) error {
if cctx.IsSet("address") {
log.Warnf("The '--address' flag is deprecated, it has been replaced by '--listen'")
if err := cctx.Set("listen", cctx.String("address")); err != nil {
return err
}
}
return nil
},
2019-11-21 00:52:59 +00:00
Action: func(cctx *cli.Context) error {
2020-08-30 18:28:58 +00:00
log.Info("Starting lotus worker")
2020-03-11 01:57:52 +00:00
if !cctx.Bool("enable-gpu-proving") {
if err := os.Setenv("BELLMAN_NO_GPU", "true"); err != nil {
return xerrors.Errorf("could not set no-gpu env: %+v", err)
}
2020-03-11 01:57:52 +00:00
}
// ensure tmpdir exists
td := os.TempDir()
if err := os.MkdirAll(td, 0755); err != nil {
return xerrors.Errorf("ensuring temp dir %s exists: %w", td, err)
}
// Check file descriptor limit
limit, _, err := ulimit.GetLimit()
switch {
case err == ulimit.ErrUnsupported:
log.Errorw("checking file descriptor limit failed", "error", err)
case err != nil:
return xerrors.Errorf("checking fd limit: %w", err)
default:
if limit < build.MinerFDLimit {
return xerrors.Errorf("soft file descriptor limit (ulimit -n) too low, want %d, current %d", build.MinerFDLimit, limit)
}
}
2023-08-30 09:32:20 +00:00
// Check DC-environment variable
sectorSizes := []string{"2KiB", "8MiB", "512MiB", "32GiB", "64GiB"}
resourcesType := reflect.TypeOf(storiface.Resources{})
for _, sectorSize := range sectorSizes {
for i := 0; i < resourcesType.NumField(); i++ {
field := resourcesType.Field(i)
envName := field.Tag.Get("envname")
if envName != "" {
// Check if DC_[SectorSize]_[ResourceRestriction] is set
envVar, ok := os.LookupEnv("DC_" + sectorSize + "_" + envName)
if ok {
// If it is set, convert it to DC_[ResourceRestriction]
err := os.Setenv("DC_"+envName, envVar)
if err != nil {
log.Fatalf("Error setting environment variable: %v", err)
}
log.Warnf("Converted DC_%s_%s to DC_%s, because DC is a sector-size independent job", sectorSize, envName, envName)
}
}
}
}
2020-03-16 17:50:07 +00:00
// Connect to storage-miner
ctx := lcli.ReqContext(cctx)
// Create a new context with cancel function
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Listen for interrupt signals
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
<-c
cancel()
}()
var nodeApi api.StorageMiner
var closer func()
for {
nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, cliutil.StorageMinerUseHttp)
if err == nil {
_, err = nodeApi.Version(ctx)
if err == nil {
break
}
}
fmt.Printf("\r\x1b[0KConnecting to miner API... (%s)", err)
select {
case <-ctx.Done():
return xerrors.New("Interrupted by user")
case <-time.After(time.Second):
}
2020-03-11 01:57:52 +00:00
}
defer closer()
2020-10-21 08:37:50 +00:00
// Register all metric views
if err := view.Register(
metrics.DefaultViews...,
); err != nil {
log.Fatalf("Cannot register the view: %v", err)
}
2020-03-11 01:57:52 +00:00
v, err := nodeApi.Version(ctx)
if err != nil {
return err
}
2021-04-15 22:19:26 +00:00
if v.APIVersion != api.MinerAPIVersion0 {
return xerrors.Errorf("lotus-miner API version doesn't match: expected: %s", api.APIVersion{APIVersion: api.MinerAPIVersion0})
2020-03-11 01:57:52 +00:00
}
2020-03-13 01:37:38 +00:00
log.Infof("Remote version %s", v)
2020-03-11 01:57:52 +00:00
2020-03-16 17:50:07 +00:00
// Check params
2020-03-11 01:57:52 +00:00
act, err := nodeApi.ActorAddress(ctx)
if err != nil {
return err
}
ssize, err := nodeApi.ActorSectorSize(ctx, act)
if err != nil {
return err
}
2020-03-25 21:15:10 +00:00
var taskTypes []sealtasks.TaskType
2022-03-24 22:28:45 +00:00
var workerType string
var needParams bool
2020-03-25 21:15:10 +00:00
2022-01-14 13:11:04 +00:00
if cctx.Bool("windowpost") {
needParams = true
2022-03-24 22:28:45 +00:00
workerType = sealtasks.WorkerWindowPoSt
2022-01-14 13:11:04 +00:00
taskTypes = append(taskTypes, sealtasks.TTGenerateWindowPoSt)
}
if cctx.Bool("winningpost") {
needParams = true
2022-03-24 22:28:45 +00:00
workerType = sealtasks.WorkerWinningPoSt
2022-01-14 13:11:04 +00:00
taskTypes = append(taskTypes, sealtasks.TTGenerateWinningPoSt)
}
2022-03-24 22:28:45 +00:00
if workerType == "" {
taskTypes = append(taskTypes, sealtasks.TTFetch, sealtasks.TTCommit1, sealtasks.TTProveReplicaUpdate1, sealtasks.TTFinalize, sealtasks.TTFinalizeUnsealed, sealtasks.TTFinalizeReplicaUpdate)
if !cctx.Bool("no-default") {
workerType = sealtasks.WorkerSealing
}
2022-01-14 13:11:04 +00:00
}
ttDataCidDefault := false
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("addpiece")) && cctx.Bool("addpiece") {
taskTypes = append(taskTypes, sealtasks.TTAddPiece)
ttDataCidDefault = true
}
2023-05-08 16:36:43 +00:00
if workerType == sealtasks.WorkerSealing {
if cctx.IsSet("data-cid") {
if cctx.Bool("data-cid") {
taskTypes = append(taskTypes, sealtasks.TTDataCid)
}
} else if ttDataCidDefault {
taskTypes = append(taskTypes, sealtasks.TTDataCid)
}
2020-08-14 14:06:53 +00:00
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("sector-download")) && cctx.Bool("sector-download") {
taskTypes = append(taskTypes, sealtasks.TTDownloadSector)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("precommit1")) && cctx.Bool("precommit1") {
2020-03-25 21:15:10 +00:00
taskTypes = append(taskTypes, sealtasks.TTPreCommit1)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("unseal")) && cctx.Bool("unseal") {
2020-08-04 12:32:09 +00:00
taskTypes = append(taskTypes, sealtasks.TTUnseal)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("precommit2")) && cctx.Bool("precommit2") {
2020-03-25 21:15:10 +00:00
taskTypes = append(taskTypes, sealtasks.TTPreCommit2)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("commit")) && cctx.Bool("commit") {
needParams = true
2020-03-25 21:15:10 +00:00
taskTypes = append(taskTypes, sealtasks.TTCommit2)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("replica-update")) && cctx.Bool("replica-update") {
taskTypes = append(taskTypes, sealtasks.TTReplicaUpdate)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("prove-replica-update2")) && cctx.Bool("prove-replica-update2") {
needParams = true
taskTypes = append(taskTypes, sealtasks.TTProveReplicaUpdate2)
}
if (workerType == sealtasks.WorkerSealing || cctx.IsSet("regen-sector-key")) && cctx.Bool("regen-sector-key") {
taskTypes = append(taskTypes, sealtasks.TTRegenSectorKey)
}
2020-03-25 21:15:10 +00:00
if cctx.Bool("no-default") && workerType == "" {
workerType = sealtasks.WorkerSealing
}
2020-03-25 21:15:10 +00:00
if len(taskTypes) == 0 {
return xerrors.Errorf("no task types specified")
2020-03-11 01:57:52 +00:00
}
2022-03-24 22:28:45 +00:00
for _, taskType := range taskTypes {
if taskType.WorkerType() != workerType {
return xerrors.Errorf("expected all task types to be for %s worker, but task %s is for %s worker", workerType, taskType, taskType.WorkerType())
}
2022-01-14 13:11:04 +00:00
}
2020-03-11 01:57:52 +00:00
if needParams {
if err := paramfetch.GetParams(ctx, build.ParametersJSON(), build.SrsJSON(), uint64(ssize)); err != nil {
return xerrors.Errorf("get params: %w", err)
}
}
2020-03-16 17:50:07 +00:00
// Open repo
2020-07-08 10:38:59 +00:00
repoPath := cctx.String(FlagWorkerRepo)
2020-03-13 01:37:38 +00:00
r, err := repo.NewFS(repoPath)
if err != nil {
return err
}
ok, err := r.Exists()
if err != nil {
return err
}
if !ok {
2020-03-16 18:46:02 +00:00
if err := r.Init(repo.Worker); err != nil {
return err
}
lr, err := r.Lock(repo.Worker)
if err != nil {
return err
}
2022-11-01 11:01:31 +00:00
var localPaths []storiface.LocalPath
2020-03-16 18:46:02 +00:00
if !cctx.Bool("no-local-storage") {
2022-11-01 11:01:31 +00:00
b, err := json.MarshalIndent(&storiface.LocalStorageMeta{
2022-01-18 10:57:04 +00:00
ID: storiface.ID(uuid.New().String()),
2020-03-16 18:46:02 +00:00
Weight: 10,
CanSeal: true,
CanStore: false,
}, "", " ")
if err != nil {
return xerrors.Errorf("marshaling storage config: %w", err)
}
if err := os.WriteFile(filepath.Join(lr.Path(), "sectorstore.json"), b, 0644); err != nil {
2020-03-16 18:46:02 +00:00
return xerrors.Errorf("persisting storage metadata (%s): %w", filepath.Join(lr.Path(), "sectorstore.json"), err)
}
2022-11-01 11:01:31 +00:00
localPaths = append(localPaths, storiface.LocalPath{
2020-03-16 18:46:02 +00:00
Path: lr.Path(),
})
}
2022-11-01 11:01:31 +00:00
if err := lr.SetStorage(func(sc *storiface.StorageConfig) {
2020-03-16 18:46:02 +00:00
sc.StoragePaths = append(sc.StoragePaths, localPaths...)
}); err != nil {
return xerrors.Errorf("set storage config: %w", err)
}
2020-03-18 23:23:28 +00:00
{
// init datastore for r.Exists
2021-01-26 10:25:34 +00:00
_, err := lr.Datastore(context.Background(), "/metadata")
2020-03-18 23:23:28 +00:00
if err != nil {
return err
}
}
2020-03-16 18:46:02 +00:00
if err := lr.Close(); err != nil {
return xerrors.Errorf("close repo: %w", err)
}
2020-03-13 01:37:38 +00:00
}
lr, err := r.Lock(repo.Worker)
if err != nil {
return err
}
defer func() {
if err := lr.Close(); err != nil {
log.Error("closing repo", err)
}
}()
2021-01-26 10:25:34 +00:00
ds, err := lr.Datastore(context.Background(), "/metadata")
if err != nil {
return err
}
2020-03-13 01:37:38 +00:00
2020-03-19 15:10:19 +00:00
log.Info("Opening local storage; connecting to master")
const unspecifiedAddress = "0.0.0.0"
address := cctx.String("listen")
host, port, err := net.SplitHostPort(address)
if err != nil {
return err
}
if ip := net.ParseIP(host); ip != nil {
if ip.String() == unspecifiedAddress {
timeout, err := time.ParseDuration(cctx.String("timeout"))
if err != nil {
return err
}
rip, err := extractRoutableIP(timeout)
if err != nil {
return err
}
host = rip
}
}
2020-03-19 15:10:19 +00:00
var newAddress string
// Check if the IP address is IPv6
ip := net.ParseIP(host)
if ip.To4() == nil && ip.To16() != nil {
newAddress = "[" + host + "]:" + port
} else {
newAddress = host + ":" + port
}
localStore, err := paths.NewLocal(ctx, lr, nodeApi, []string{"http://" + newAddress + "/remote"})
2020-03-13 01:37:38 +00:00
if err != nil {
return err
}
2020-03-16 17:50:07 +00:00
// Setup remote sector store
2020-03-13 01:37:38 +00:00
sminfo, err := lcli.GetAPIInfo(cctx, repo.StorageMiner)
if err != nil {
return xerrors.Errorf("could not get api info: %w", err)
}
remote := paths.NewRemote(localStore, nodeApi, sminfo.AuthHeader(), cctx.Int("parallel-fetch-limit"),
&paths.DefaultPartialFileHandler{})
2020-03-13 01:37:38 +00:00
fh := &paths.FetchHandler{Local: localStore, PfHandler: &paths.DefaultPartialFileHandler{}}
2020-11-25 16:05:45 +00:00
remoteHandler := func(w http.ResponseWriter, r *http.Request) {
2021-03-25 14:09:50 +00:00
if !auth.HasPerm(r.Context(), nil, api.PermAdmin) {
2020-11-25 16:05:45 +00:00
w.WriteHeader(401)
_ = json.NewEncoder(w).Encode(struct{ Error string }{"unauthorized: missing admin permission"})
return
}
fh.ServeHTTP(w, r)
}
2023-08-18 15:13:57 +00:00
// Parse ffi executor flags
var ffiOpts []ffiwrapper.FFIWrapperOpt
if cctx.IsSet("external-pc2") {
extSeal := ffiwrapper.ExternalSealer{
PreCommit2: ffiwrapper.MakeExternPrecommit2(cctx.String("external-pc2")),
}
ffiOpts = append(ffiOpts, ffiwrapper.WithExternalSealCalls(extSeal))
}
2020-03-16 17:50:07 +00:00
// Create / expose the worker
wsts := statestore.New(namespace.Wrap(ds, modules.WorkerCallsPrefix))
2020-09-14 07:44:55 +00:00
2022-01-14 13:11:04 +00:00
workerApi := &sealworker.Worker{
2023-08-18 15:13:57 +00:00
LocalWorker: sealer.NewLocalWorkerWithExecutor(
sealer.FFIExec(ffiOpts...),
sealer.WorkerConfig{
TaskTypes: taskTypes,
NoSwap: cctx.Bool("no-swap"),
MaxParallelChallengeReads: cctx.Int("post-parallel-reads"),
ChallengeReadTimeout: cctx.Duration("post-read-timeout"),
Name: cctx.String("name"),
}, os.LookupEnv, remote, localStore, nodeApi, nodeApi, wsts),
2022-01-14 13:11:04 +00:00
LocalStore: localStore,
Storage: lr,
2020-03-13 01:37:38 +00:00
}
log.Info("Setting up control endpoint at " + newAddress)
2020-03-18 04:40:25 +00:00
timeout, err := time.ParseDuration(cctx.String("http-server-timeout"))
if err != nil {
return xerrors.Errorf("invalid time string %s: %x", cctx.String("http-server-timeout"), err)
}
2020-03-18 23:23:28 +00:00
srv := &http.Server{
2022-11-25 21:24:09 +00:00
Handler: sealworker.WorkerHandler(nodeApi.AuthVerify, remoteHandler, workerApi, true),
ReadHeaderTimeout: timeout,
2020-03-18 23:23:28 +00:00
BaseContext: func(listener net.Listener) context.Context {
2020-10-21 08:37:50 +00:00
ctx, _ := tag.New(context.Background(), tag.Upsert(metrics.APIInterface, "lotus-worker"))
2020-03-18 23:23:28 +00:00
return ctx
},
}
2020-03-13 01:37:38 +00:00
go func() {
2020-03-18 04:40:25 +00:00
<-ctx.Done()
2020-06-02 19:30:45 +00:00
log.Warn("Shutting down...")
2020-03-13 01:37:38 +00:00
if err := srv.Shutdown(context.TODO()); err != nil {
log.Errorf("shutting down RPC server failed: %s", err)
}
log.Warn("Graceful shutdown successful")
}()
2020-03-16 17:50:07 +00:00
nl, err := net.Listen("tcp", newAddress)
2020-03-16 17:50:07 +00:00
if err != nil {
return err
}
2020-08-30 18:28:58 +00:00
{
a, err := net.ResolveTCPAddr("tcp", newAddress)
2020-08-30 18:28:58 +00:00
if err != nil {
return xerrors.Errorf("parsing address: %w", err)
}
ma, err := manet.FromNetAddr(a)
if err != nil {
return xerrors.Errorf("creating api multiaddress: %w", err)
}
if err := lr.SetAPIEndpoint(ma); err != nil {
return xerrors.Errorf("setting api endpoint: %w", err)
}
ainfo, err := lcli.GetAPIInfo(cctx, repo.StorageMiner)
if err != nil {
return xerrors.Errorf("could not get miner API info: %w", err)
}
// TODO: ideally this would be a token with some permissions dropped
if err := lr.SetAPIToken(ainfo.Token); err != nil {
return xerrors.Errorf("setting api token: %w", err)
}
}
minerSession, err := nodeApi.Session(ctx)
if err != nil {
return xerrors.Errorf("getting miner session: %w", err)
}
waitQuietCh := func() chan struct{} {
out := make(chan struct{})
go func() {
workerApi.LocalWorker.WaitQuiet()
close(out)
}()
return out
}
2020-03-18 23:23:28 +00:00
go func() {
heartbeats := time.NewTicker(paths.HeartbeatInterval)
defer heartbeats.Stop()
var redeclareStorage bool
var readyCh chan struct{}
2020-09-22 16:36:44 +00:00
for {
// If we're reconnecting, redeclare storage first
if redeclareStorage {
log.Info("Redeclaring local storage")
if err := localStore.Redeclare(ctx, nil, false); err != nil {
2020-09-28 19:06:49 +00:00
log.Errorf("Redeclaring local storage failed: %+v", err)
select {
case <-ctx.Done():
return // graceful shutdown
case <-heartbeats.C:
}
continue
2020-09-28 19:06:49 +00:00
}
}
// TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly
if readyCh == nil {
log.Info("Making sure no local tasks are running")
readyCh = waitQuietCh()
}
for {
curSession, err := nodeApi.Session(ctx)
if err != nil {
log.Errorf("heartbeat: checking remote session failed: %+v", err)
} else {
if curSession != minerSession {
minerSession = curSession
break
}
}
select {
case <-readyCh:
if err := nodeApi.WorkerConnect(ctx, "http://"+newAddress+"/rpc/v0"); err != nil {
log.Errorf("Registering worker failed: %+v", err)
cancel()
return
}
log.Info("Worker registered successfully, waiting for tasks")
readyCh = nil
case <-heartbeats.C:
case <-ctx.Done():
return // graceful shutdown
}
2020-09-22 16:36:44 +00:00
}
2020-09-22 16:36:44 +00:00
log.Errorf("LOTUS-MINER CONNECTION LOST")
2020-09-28 19:06:49 +00:00
redeclareStorage = true
2020-09-22 16:36:44 +00:00
}
}()
2022-07-29 22:33:30 +00:00
go func() {
<-workerApi.Done()
// Wait 20s to allow the miner to unregister the worker on next heartbeat
time.Sleep(20 * time.Second)
2022-07-29 22:33:30 +00:00
log.Warn("Shutting down...")
if err := srv.Shutdown(context.TODO()); err != nil {
log.Errorf("shutting down RPC server failed: %s", err)
}
log.Warn("Graceful shutdown successful")
}()
2020-09-22 16:36:44 +00:00
return srv.Serve(nl)
},
}
func extractRoutableIP(timeout time.Duration) (string, error) {
minerMultiAddrKey := "MINER_API_INFO"
deprecatedMinerMultiAddrKey := "STORAGE_API_INFO"
env, ok := os.LookupEnv(minerMultiAddrKey)
if !ok {
_, ok = os.LookupEnv(deprecatedMinerMultiAddrKey)
if ok {
log.Warnf("Using a deprecated env(%s) value, please use env(%s) instead.", deprecatedMinerMultiAddrKey, minerMultiAddrKey)
}
return "", xerrors.New("MINER_API_INFO environment variable required to extract IP")
}
// Splitting the env to separate the JWT from the multiaddress
splitEnv := strings.SplitN(env, ":", 2)
if len(splitEnv) < 2 {
return "", xerrors.Errorf("invalid MINER_API_INFO format")
}
// Only take the multiaddress part
maddrStr := splitEnv[1]
maddr, err := multiaddr.NewMultiaddr(maddrStr)
if err != nil {
return "", err
}
minerIP, _ := maddr.ValueForProtocol(multiaddr.P_IP6)
if minerIP == "" {
minerIP, _ = maddr.ValueForProtocol(multiaddr.P_IP4)
}
minerPort, _ := maddr.ValueForProtocol(multiaddr.P_TCP)
// Format the address appropriately
addressToDial := net.JoinHostPort(minerIP, minerPort)
conn, err := net.DialTimeout("tcp", addressToDial, timeout)
if err != nil {
return "", err
}
defer func() {
if cerr := conn.Close(); cerr != nil {
log.Errorf("Error closing connection: %v", cerr)
}
}()
localAddr := conn.LocalAddr().(*net.TCPAddr)
return localAddr.IP.String(), nil
}