cosmos-sdk/server/start.go
2023-06-30 15:27:05 +00:00

606 lines
20 KiB
Go

package server
import (
"context"
"errors"
"fmt"
"io"
"net"
"os"
"runtime/pprof"
"github.com/cometbft/cometbft/abci/server"
cmtcmd "github.com/cometbft/cometbft/cmd/cometbft/commands"
cmtcfg "github.com/cometbft/cometbft/config"
"github.com/cometbft/cometbft/node"
"github.com/cometbft/cometbft/p2p"
pvm "github.com/cometbft/cometbft/privval"
"github.com/cometbft/cometbft/proxy"
"github.com/cometbft/cometbft/rpc/client/local"
cmttypes "github.com/cometbft/cometbft/types"
dbm "github.com/cosmos/cosmos-db"
"github.com/hashicorp/go-metrics"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"golang.org/x/sync/errgroup"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
pruningtypes "cosmossdk.io/store/pruning/types"
"github.com/cosmos/cosmos-sdk/client"
"github.com/cosmos/cosmos-sdk/client/flags"
"github.com/cosmos/cosmos-sdk/codec"
"github.com/cosmos/cosmos-sdk/server/api"
serverconfig "github.com/cosmos/cosmos-sdk/server/config"
servergrpc "github.com/cosmos/cosmos-sdk/server/grpc"
servercmtlog "github.com/cosmos/cosmos-sdk/server/log"
"github.com/cosmos/cosmos-sdk/server/types"
"github.com/cosmos/cosmos-sdk/telemetry"
"github.com/cosmos/cosmos-sdk/types/mempool"
"github.com/cosmos/cosmos-sdk/version"
genutiltypes "github.com/cosmos/cosmos-sdk/x/genutil/types"
)
const (
// CometBFT full-node start flags
flagWithComet = "with-comet"
flagAddress = "address"
flagTransport = "transport"
flagTraceStore = "trace-store"
flagCPUProfile = "cpu-profile"
FlagMinGasPrices = "minimum-gas-prices"
FlagHaltHeight = "halt-height"
FlagHaltTime = "halt-time"
FlagInterBlockCache = "inter-block-cache"
FlagUnsafeSkipUpgrades = "unsafe-skip-upgrades"
FlagTrace = "trace"
FlagInvCheckPeriod = "inv-check-period"
FlagPruning = "pruning"
FlagPruningKeepRecent = "pruning-keep-recent"
FlagPruningInterval = "pruning-interval"
FlagIndexEvents = "index-events"
FlagMinRetainBlocks = "min-retain-blocks"
FlagIAVLCacheSize = "iavl-cache-size"
FlagDisableIAVLFastNode = "iavl-disable-fastnode"
// state sync-related flags
FlagStateSyncSnapshotInterval = "state-sync.snapshot-interval"
FlagStateSyncSnapshotKeepRecent = "state-sync.snapshot-keep-recent"
// api-related flags
FlagAPIEnable = "api.enable"
FlagAPISwagger = "api.swagger"
FlagAPIAddress = "api.address"
FlagAPIMaxOpenConnections = "api.max-open-connections"
FlagRPCReadTimeout = "api.rpc-read-timeout"
FlagRPCWriteTimeout = "api.rpc-write-timeout"
FlagRPCMaxBodyBytes = "api.rpc-max-body-bytes"
FlagAPIEnableUnsafeCORS = "api.enabled-unsafe-cors"
// gRPC-related flags
flagGRPCOnly = "grpc-only"
flagGRPCEnable = "grpc.enable"
flagGRPCAddress = "grpc.address"
flagGRPCWebEnable = "grpc-web.enable"
// mempool flags
FlagMempoolMaxTxs = "mempool.max-txs"
)
// StartCmdOptions defines options that can be customized in `StartCmdWithOptions`,
type StartCmdOptions struct {
// DBOpener can be used to customize db opening, for example customize db options or support different db backends,
// default to the builtin db opener.
DBOpener func(rootDir string, backendType dbm.BackendType) (dbm.DB, error)
// PostSetup can be used to setup extra services under the same cancellable context,
// it's not called in stand-alone mode, only for in-process mode.
PostSetup func(svrCtx *Context, clientCtx client.Context, ctx context.Context, g *errgroup.Group) error
// AddFlags add custom flags to start cmd
AddFlags func(cmd *cobra.Command)
}
// StartCmd runs the service passed in, either stand-alone or in-process with
// CometBFT.
func StartCmd(appCreator types.AppCreator, defaultNodeHome string) *cobra.Command {
return StartCmdWithOptions(appCreator, defaultNodeHome, StartCmdOptions{})
}
// StartCmdWithOptions runs the service passed in, either stand-alone or in-process with
// CometBFT.
func StartCmdWithOptions(appCreator types.AppCreator, defaultNodeHome string, opts StartCmdOptions) *cobra.Command {
if opts.DBOpener == nil {
opts.DBOpener = openDB
}
cmd := &cobra.Command{
Use: "start",
Short: "Run the full node",
Long: `Run the full node application with CometBFT in or out of process. By
default, the application will run with CometBFT in process.
Pruning options can be provided via the '--pruning' flag or alternatively with '--pruning-keep-recent', and
'pruning-interval' together.
For '--pruning' the options are as follows:
default: the last 362880 states are kept, pruning at 10 block intervals
nothing: all historic states will be saved, nothing will be deleted (i.e. archiving node)
everything: 2 latest states will be kept; pruning at 10 block intervals.
custom: allow pruning options to be manually specified through 'pruning-keep-recent', and 'pruning-interval'
Node halting configurations exist in the form of two flags: '--halt-height' and '--halt-time'. During
the ABCI Commit phase, the node will check if the current block height is greater than or equal to
the halt-height or if the current block time is greater than or equal to the halt-time. If so, the
node will attempt to gracefully shutdown and the block will not be committed. In addition, the node
will not be able to commit subsequent blocks.
For profiling and benchmarking purposes, CPU profiling can be enabled via the '--cpu-profile' flag
which accepts a path for the resulting pprof file.
The node may be started in a 'query only' mode where only the gRPC and JSON HTTP
API services are enabled via the 'grpc-only' flag. In this mode, CometBFT is
bypassed and can be used when legacy queries are needed after an on-chain upgrade
is performed. Note, when enabled, gRPC will also be automatically enabled.
`,
PreRunE: func(cmd *cobra.Command, _ []string) error {
serverCtx := GetServerContextFromCmd(cmd)
// Bind flags to the Context's Viper so the app construction can set
// options accordingly.
if err := serverCtx.Viper.BindPFlags(cmd.Flags()); err != nil {
return err
}
_, err := GetPruningOptionsFromFlags(serverCtx.Viper)
return err
},
RunE: func(cmd *cobra.Command, _ []string) error {
serverCtx := GetServerContextFromCmd(cmd)
clientCtx, err := client.GetClientQueryContext(cmd)
if err != nil {
return err
}
withCMT, _ := cmd.Flags().GetBool(flagWithComet)
if !withCMT {
serverCtx.Logger.Info("starting ABCI without CometBFT")
}
return wrapCPUProfile(serverCtx, func() error {
return start(serverCtx, clientCtx, appCreator, withCMT, opts)
})
},
}
cmd.Flags().String(flags.FlagHome, defaultNodeHome, "The application home directory")
cmd.Flags().Bool(flagWithComet, true, "Run abci app embedded in-process with CometBFT")
cmd.Flags().String(flagAddress, "tcp://0.0.0.0:26658", "Listen address")
cmd.Flags().String(flagTransport, "socket", "Transport protocol: socket, grpc")
cmd.Flags().String(flagTraceStore, "", "Enable KVStore tracing to an output file")
cmd.Flags().String(FlagMinGasPrices, "", "Minimum gas prices to accept for transactions; Any fee in a tx must meet this minimum (e.g. 0.01photino;0.0001stake)")
cmd.Flags().IntSlice(FlagUnsafeSkipUpgrades, []int{}, "Skip a set of upgrade heights to continue the old binary")
cmd.Flags().Uint64(FlagHaltHeight, 0, "Block height at which to gracefully halt the chain and shutdown the node")
cmd.Flags().Uint64(FlagHaltTime, 0, "Minimum block time (in Unix seconds) at which to gracefully halt the chain and shutdown the node")
cmd.Flags().Bool(FlagInterBlockCache, true, "Enable inter-block caching")
cmd.Flags().String(flagCPUProfile, "", "Enable CPU profiling and write to the provided file")
cmd.Flags().Bool(FlagTrace, false, "Provide full stack traces for errors in ABCI Log")
cmd.Flags().String(FlagPruning, pruningtypes.PruningOptionDefault, "Pruning strategy (default|nothing|everything|custom)")
cmd.Flags().Uint64(FlagPruningKeepRecent, 0, "Number of recent heights to keep on disk (ignored if pruning is not 'custom')")
cmd.Flags().Uint64(FlagPruningInterval, 0, "Height interval at which pruned heights are removed from disk (ignored if pruning is not 'custom')")
cmd.Flags().Uint(FlagInvCheckPeriod, 0, "Assert registered invariants every N blocks")
cmd.Flags().Uint64(FlagMinRetainBlocks, 0, "Minimum block height offset during ABCI commit to prune CometBFT blocks")
cmd.Flags().Bool(FlagAPIEnable, false, "Define if the API server should be enabled")
cmd.Flags().Bool(FlagAPISwagger, false, "Define if swagger documentation should automatically be registered (Note: the API must also be enabled)")
cmd.Flags().String(FlagAPIAddress, serverconfig.DefaultAPIAddress, "the API server address to listen on")
cmd.Flags().Uint(FlagAPIMaxOpenConnections, 1000, "Define the number of maximum open connections")
cmd.Flags().Uint(FlagRPCReadTimeout, 10, "Define the CometBFT RPC read timeout (in seconds)")
cmd.Flags().Uint(FlagRPCWriteTimeout, 0, "Define the CometBFT RPC write timeout (in seconds)")
cmd.Flags().Uint(FlagRPCMaxBodyBytes, 1000000, "Define the CometBFT maximum request body (in bytes)")
cmd.Flags().Bool(FlagAPIEnableUnsafeCORS, false, "Define if CORS should be enabled (unsafe - use it at your own risk)")
cmd.Flags().Bool(flagGRPCOnly, false, "Start the node in gRPC query only mode (no CometBFT process is started)")
cmd.Flags().Bool(flagGRPCEnable, true, "Define if the gRPC server should be enabled")
cmd.Flags().String(flagGRPCAddress, serverconfig.DefaultGRPCAddress, "the gRPC server address to listen on")
cmd.Flags().Bool(flagGRPCWebEnable, true, "Define if the gRPC-Web server should be enabled. (Note: gRPC must also be enabled)")
cmd.Flags().Uint64(FlagStateSyncSnapshotInterval, 0, "State sync snapshot interval")
cmd.Flags().Uint32(FlagStateSyncSnapshotKeepRecent, 2, "State sync snapshot to keep")
cmd.Flags().Bool(FlagDisableIAVLFastNode, false, "Disable fast node for IAVL tree")
cmd.Flags().Int(FlagMempoolMaxTxs, mempool.DefaultMaxTx, "Sets MaxTx value for the app-side mempool")
// support old flags name for backwards compatibility
cmd.Flags().SetNormalizeFunc(func(f *pflag.FlagSet, name string) pflag.NormalizedName {
if name == "with-tendermint" {
name = flagWithComet
}
return pflag.NormalizedName(name)
})
// add support for all CometBFT-specific command line options
cmtcmd.AddNodeFlags(cmd)
if opts.AddFlags != nil {
opts.AddFlags(cmd)
}
return cmd
}
func start(svrCtx *Context, clientCtx client.Context, appCreator types.AppCreator, withCmt bool, opts StartCmdOptions) error {
svrCfg, err := getAndValidateConfig(svrCtx)
if err != nil {
return err
}
app, appCleanupFn, err := startApp(svrCtx, appCreator, opts)
if err != nil {
return err
}
defer appCleanupFn()
metrics, err := startTelemetry(svrCfg)
if err != nil {
return err
}
emitServerInfoMetrics()
if !withCmt {
return startStandAlone(svrCtx, app, opts)
}
return startInProcess(svrCtx, svrCfg, clientCtx, app, metrics, opts)
}
func startStandAlone(svrCtx *Context, app types.Application, opts StartCmdOptions) error {
addr := svrCtx.Viper.GetString(flagAddress)
transport := svrCtx.Viper.GetString(flagTransport)
cmtApp := NewCometABCIWrapper(app)
svr, err := server.NewServer(addr, transport, cmtApp)
if err != nil {
return fmt.Errorf("error creating listener: %v", err)
}
svr.SetLogger(servercmtlog.CometLoggerWrapper{Logger: svrCtx.Logger.With("module", "abci-server")})
g, ctx := getCtx(svrCtx, false)
g.Go(func() error {
if err := svr.Start(); err != nil {
svrCtx.Logger.Error("failed to start out-of-process ABCI server", "err", err)
return err
}
// Wait for the calling process to be canceled or close the provided context,
// so we can gracefully stop the ABCI server.
<-ctx.Done()
svrCtx.Logger.Info("stopping the ABCI server...")
return errors.Join(svr.Stop(), app.Close())
})
return g.Wait()
}
func startInProcess(svrCtx *Context, svrCfg serverconfig.Config, clientCtx client.Context, app types.Application,
metrics *telemetry.Metrics, opts StartCmdOptions,
) error {
cmtCfg := svrCtx.Config
home := cmtCfg.RootDir
gRPCOnly := svrCtx.Viper.GetBool(flagGRPCOnly)
g, ctx := getCtx(svrCtx, true)
if gRPCOnly {
// TODO: Generalize logic so that gRPC only is really in startStandAlone
svrCtx.Logger.Info("starting node in gRPC only mode; CometBFT is disabled")
svrCfg.GRPC.Enable = true
} else {
svrCtx.Logger.Info("starting node with ABCI CometBFT in-process")
tmNode, cleanupFn, err := startCmtNode(ctx, cmtCfg, app, svrCtx)
if err != nil {
return err
}
defer cleanupFn()
// Add the tx service to the gRPC router. We only need to register this
// service if API or gRPC is enabled, and avoid doing so in the general
// case, because it spawns a new local CometBFT RPC client.
if svrCfg.API.Enable || svrCfg.GRPC.Enable {
// Re-assign for making the client available below do not use := to avoid
// shadowing the clientCtx variable.
clientCtx = clientCtx.WithClient(local.New(tmNode))
app.RegisterTxService(clientCtx)
app.RegisterTendermintService(clientCtx)
app.RegisterNodeService(clientCtx, svrCfg)
}
}
grpcSrv, clientCtx, err := startGrpcServer(ctx, g, svrCfg.GRPC, clientCtx, svrCtx, app)
if err != nil {
return err
}
err = startAPIServer(ctx, g, cmtCfg, svrCfg, clientCtx, svrCtx, app, home, grpcSrv, metrics)
if err != nil {
return err
}
if opts.PostSetup != nil {
if err := opts.PostSetup(svrCtx, clientCtx, ctx, g); err != nil {
return err
}
}
// wait for signal capture and gracefully return
// we are guaranteed to be waiting for the "ListenForQuitSignals" goroutine.
return g.Wait()
}
// TODO: Move nodeKey into being created within the function.
func startCmtNode(
ctx context.Context,
cfg *cmtcfg.Config,
app types.Application,
svrCtx *Context,
) (tmNode *node.Node, cleanupFn func(), err error) {
nodeKey, err := p2p.LoadOrGenNodeKey(cfg.NodeKeyFile())
if err != nil {
return nil, cleanupFn, err
}
cmtApp := NewCometABCIWrapper(app)
tmNode, err = node.NewNodeWithContext(
ctx,
cfg,
pvm.LoadOrGenFilePV(cfg.PrivValidatorKeyFile(), cfg.PrivValidatorStateFile()),
nodeKey,
proxy.NewLocalClientCreator(cmtApp),
getGenDocProvider(cfg),
cmtcfg.DefaultDBProvider,
node.DefaultMetricsProvider(cfg.Instrumentation),
servercmtlog.CometLoggerWrapper{Logger: svrCtx.Logger},
)
if err != nil {
return tmNode, cleanupFn, err
}
if err := tmNode.Start(); err != nil {
return tmNode, cleanupFn, err
}
cleanupFn = func() {
if tmNode != nil && tmNode.IsRunning() {
_ = tmNode.Stop()
_ = app.Close()
}
}
return tmNode, cleanupFn, nil
}
func getAndValidateConfig(svrCtx *Context) (serverconfig.Config, error) {
config, err := serverconfig.GetConfig(svrCtx.Viper)
if err != nil {
return config, err
}
if err := config.ValidateBasic(); err != nil {
return config, err
}
return config, nil
}
// returns a function which returns the genesis doc from the genesis file.
func getGenDocProvider(cfg *cmtcfg.Config) func() (*cmttypes.GenesisDoc, error) {
return func() (*cmttypes.GenesisDoc, error) {
appGenesis, err := genutiltypes.AppGenesisFromFile(cfg.GenesisFile())
if err != nil {
return nil, err
}
return appGenesis.ToGenesisDoc()
}
}
func setupTraceWriter(svrCtx *Context) (traceWriter io.WriteCloser, cleanup func(), err error) {
// clean up the traceWriter when the server is shutting down
cleanup = func() {}
traceWriterFile := svrCtx.Viper.GetString(flagTraceStore)
traceWriter, err = openTraceWriter(traceWriterFile)
if err != nil {
return traceWriter, cleanup, err
}
// if flagTraceStore is not used then traceWriter is nil
if traceWriter != nil {
cleanup = func() {
if err = traceWriter.Close(); err != nil {
svrCtx.Logger.Error("failed to close trace writer", "err", err)
}
}
}
return traceWriter, cleanup, nil
}
func startGrpcServer(
ctx context.Context,
g *errgroup.Group,
config serverconfig.GRPCConfig,
clientCtx client.Context,
svrCtx *Context,
app types.Application,
) (*grpc.Server, client.Context, error) {
if !config.Enable {
// return grpcServer as nil if gRPC is disabled
return nil, clientCtx, nil
}
_, port, err := net.SplitHostPort(config.Address)
if err != nil {
return nil, clientCtx, err
}
maxSendMsgSize := config.MaxSendMsgSize
if maxSendMsgSize == 0 {
maxSendMsgSize = serverconfig.DefaultGRPCMaxSendMsgSize
}
maxRecvMsgSize := config.MaxRecvMsgSize
if maxRecvMsgSize == 0 {
maxRecvMsgSize = serverconfig.DefaultGRPCMaxRecvMsgSize
}
grpcAddress := fmt.Sprintf("127.0.0.1:%s", port)
// if gRPC is enabled, configure gRPC client for gRPC gateway
grpcClient, err := grpc.Dial(
grpcAddress,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultCallOptions(
grpc.ForceCodec(codec.NewProtoCodec(clientCtx.InterfaceRegistry).GRPCCodec()),
grpc.MaxCallRecvMsgSize(maxRecvMsgSize),
grpc.MaxCallSendMsgSize(maxSendMsgSize),
),
)
if err != nil {
return nil, clientCtx, err
}
clientCtx = clientCtx.WithGRPCClient(grpcClient)
svrCtx.Logger.Debug("gRPC client assigned to client context", "target", grpcAddress)
grpcSrv, err := servergrpc.NewGRPCServer(clientCtx, app, config)
if err != nil {
return nil, clientCtx, err
}
// Start the gRPC server in a goroutine. Note, the provided ctx will ensure
// that the server is gracefully shut down.
g.Go(func() error {
return servergrpc.StartGRPCServer(ctx, svrCtx.Logger.With("module", "grpc-server"), config, grpcSrv)
})
return grpcSrv, clientCtx, nil
}
func startAPIServer(
ctx context.Context,
g *errgroup.Group,
cmtCfg *cmtcfg.Config,
svrCfg serverconfig.Config,
clientCtx client.Context,
svrCtx *Context,
app types.Application,
home string,
grpcSrv *grpc.Server,
metrics *telemetry.Metrics,
) error {
if !svrCfg.API.Enable {
return nil
}
clientCtx = clientCtx.WithHomeDir(home)
apiSrv := api.New(clientCtx, svrCtx.Logger.With("module", "api-server"), grpcSrv)
app.RegisterAPIRoutes(apiSrv, svrCfg.API)
if svrCfg.Telemetry.Enabled {
apiSrv.SetTelemetry(metrics)
}
g.Go(func() error {
return apiSrv.Start(ctx, svrCfg)
})
return nil
}
func startTelemetry(cfg serverconfig.Config) (*telemetry.Metrics, error) {
if !cfg.Telemetry.Enabled {
return nil, nil
}
return telemetry.New(cfg.Telemetry)
}
// wrapCPUProfile starts CPU profiling, if enabled, and executes the provided
// callbackFn in a separate goroutine, then will wait for that callback to
// return.
//
// NOTE: We expect the caller to handle graceful shutdown and signal handling.
func wrapCPUProfile(svrCtx *Context, callbackFn func() error) error {
if cpuProfile := svrCtx.Viper.GetString(flagCPUProfile); cpuProfile != "" {
f, err := os.Create(cpuProfile)
if err != nil {
return err
}
svrCtx.Logger.Info("starting CPU profiler", "profile", cpuProfile)
if err := pprof.StartCPUProfile(f); err != nil {
return err
}
defer func() {
svrCtx.Logger.Info("stopping CPU profiler", "profile", cpuProfile)
pprof.StopCPUProfile()
if err := f.Close(); err != nil {
svrCtx.Logger.Info("failed to close cpu-profile file", "profile", cpuProfile, "err", err.Error())
}
}()
}
return callbackFn()
}
// emitServerInfoMetrics emits server info related metrics using application telemetry.
func emitServerInfoMetrics() {
var ls []metrics.Label
versionInfo := version.NewInfo()
if len(versionInfo.GoVersion) > 0 {
ls = append(ls, telemetry.NewLabel("go", versionInfo.GoVersion))
}
if len(versionInfo.CosmosSdkVersion) > 0 {
ls = append(ls, telemetry.NewLabel("version", versionInfo.CosmosSdkVersion))
}
if len(ls) == 0 {
return
}
telemetry.SetGaugeWithLabels([]string{"server", "info"}, 1, ls)
}
func getCtx(svrCtx *Context, block bool) (*errgroup.Group, context.Context) {
ctx, cancelFn := context.WithCancel(context.Background())
g, ctx := errgroup.WithContext(ctx)
// listen for quit signals so the calling parent process can gracefully exit
ListenForQuitSignals(g, block, cancelFn, svrCtx.Logger)
return g, ctx
}
func startApp(svrCtx *Context, appCreator types.AppCreator, opts StartCmdOptions) (app types.Application, cleanupFn func(), err error) {
traceWriter, traceCleanupFn, err := setupTraceWriter(svrCtx)
if err != nil {
return app, traceCleanupFn, err
}
home := svrCtx.Config.RootDir
db, err := opts.DBOpener(home, GetAppDBBackend(svrCtx.Viper))
if err != nil {
return app, traceCleanupFn, err
}
app = appCreator(svrCtx.Logger, db, traceWriter, svrCtx.Viper)
cleanupFn = func() {
traceCleanupFn()
if localErr := app.Close(); localErr != nil {
svrCtx.Logger.Error(localErr.Error())
}
}
return app, cleanupFn, nil
}