From 4d02519ec0a3fa5786e70b6c8a8b01dcd362cea0 Mon Sep 17 00:00:00 2001 From: yihuang Date: Thu, 9 Feb 2023 07:53:27 +0800 Subject: [PATCH] feat: support profiling block replay during abci handshake (#14953) ## Description by default, the signal trap is not setup during abci handshake, so you can't profile at this stage, but it's an interesting way to profile production block data with the block replay. --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [ ] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] added `!` to the type prefix if API or client breaking change - [ ] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/main/CONTRIBUTING.md#pr-targeting)) - [ ] provided a link to the relevant issue or specification - [ ] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/main/docs/docs/building-modules) - [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/main/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [ ] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable) --- CHANGELOG.md | 1 + server/start.go | 85 +++++++++++++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e718ec5015..d78b4d5f85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -144,6 +144,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ * [#14406](https://github.com/cosmos/cosmos-sdk/issues/14406) Migrate usage of types/store.go to store/types/.. * (x/staking) [#14590](https://github.com/cosmos/cosmos-sdk/pull/14590) Return undelegate amount in MsgUndelegateResponse * (tools) [#14793](https://github.com/cosmos/cosmos-sdk/pull/14793) Dockerfile optimization. +* (cli) [#14953](https://github.com/cosmos/cosmos-sdk/pull/14953) Enable profiling block replay during abci handshake with `--cpu-profile`. ### State Machine Breaking diff --git a/server/start.go b/server/start.go index d0ebec393d..7ad4ca73ad 100644 --- a/server/start.go +++ b/server/start.go @@ -135,11 +135,15 @@ is performed. Note, when enabled, gRPC will also be automatically enabled. withCMT, _ := cmd.Flags().GetBool(flagWithComet) if !withCMT { serverCtx.Logger.Info("starting ABCI without CometBFT") - return startStandAlone(serverCtx, appCreator) + return wrapCPUProfile(serverCtx, func() error { + return startStandAlone(serverCtx, appCreator) + }) } // amino is needed here for backwards compatibility of REST routes - err = startInProcess(serverCtx, clientCtx, appCreator) + err = wrapCPUProfile(serverCtx, func() error { + return startInProcess(serverCtx, clientCtx, appCreator) + }) errCode, ok := err.(ErrorCode) if !ok { return err @@ -259,27 +263,6 @@ func startStandAlone(ctx *Context, appCreator types.AppCreator) error { func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.AppCreator) error { cfg := ctx.Config home := cfg.RootDir - var cpuProfileCleanup func() - - if cpuProfile := ctx.Viper.GetString(flagCPUProfile); cpuProfile != "" { - f, err := os.Create(cpuProfile) - if err != nil { - return err - } - - ctx.Logger.Info("starting CPU profiler", "profile", cpuProfile) - if err := pprof.StartCPUProfile(f); err != nil { - return err - } - - cpuProfileCleanup = func() { - ctx.Logger.Info("stopping CPU profiler", "profile", cpuProfile) - pprof.StopCPUProfile() - if err := f.Close(); err != nil { - ctx.Logger.Info("failed to close cpu-profile file", "profile", cpuProfile, "err", err.Error()) - } - } - } db, err := openDB(home, GetAppDBBackend(ctx.Viper)) if err != nil { @@ -292,16 +275,11 @@ func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.App return err } - // Clean up the traceWriter in the cpuProfileCleanup routine that is invoked - // when the server is shutting down. - fn := cpuProfileCleanup - cpuProfileCleanup = func() { - if fn != nil { - fn() - } - - // if flagTraceStore is not used then traceWriter is nil - if traceWriter != nil { + // Clean up the traceWriter when the server is shutting down. + var traceWriterCleanup func() + // if flagTraceStore is not used then traceWriter is nil + if traceWriter != nil { + traceWriterCleanup = func() { if err = traceWriter.Close(); err != nil { ctx.Logger.Error("failed to close trace writer", "err", err) } @@ -474,8 +452,8 @@ func startInProcess(ctx *Context, clientCtx client.Context, appCreator types.App _ = tmNode.Stop() } - if cpuProfileCleanup != nil { - cpuProfileCleanup() + if traceWriterCleanup != nil { + traceWriterCleanup() } if apiSrv != nil { @@ -495,3 +473,40 @@ func startTelemetry(cfg serverconfig.Config) (*telemetry.Metrics, error) { } return telemetry.New(cfg.Telemetry) } + +// wrapCPUProfile runs callback in a goroutine, then wait for quit signals. +func wrapCPUProfile(ctx *Context, callback func() error) error { + if cpuProfile := ctx.Viper.GetString(flagCPUProfile); cpuProfile != "" { + f, err := os.Create(cpuProfile) + if err != nil { + return err + } + + ctx.Logger.Info("starting CPU profiler", "profile", cpuProfile) + if err := pprof.StartCPUProfile(f); err != nil { + return err + } + + defer func() { + ctx.Logger.Info("stopping CPU profiler", "profile", cpuProfile) + pprof.StopCPUProfile() + if err := f.Close(); err != nil { + ctx.Logger.Info("failed to close cpu-profile file", "profile", cpuProfile, "err", err.Error()) + } + }() + } + + errCh := make(chan error) + go func() { + errCh <- callback() + }() + + select { + case err := <-errCh: + return err + + case <-time.After(types.ServerStartTime): + } + + return WaitForQuitSignals() +}