Merge pull request #7341 from filecoin-project/mg/feat/panic-reporter
feat: Catch panic to generate report and reraise
This commit is contained in:
commit
95e8b59367
183
build/panic_reporter.go
Normal file
183
build/panic_reporter.go
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
package build
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime/debug"
|
||||||
|
"runtime/pprof"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/icza/backscanner"
|
||||||
|
logging "github.com/ipfs/go-log/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
panicLog = logging.Logger("panic-reporter")
|
||||||
|
defaultJournalTail = 500
|
||||||
|
)
|
||||||
|
|
||||||
|
// PanicReportingPath is the name of the subdir created within the repoPath
|
||||||
|
// path provided to GeneratePanicReport
|
||||||
|
var PanicReportingPath = "panic-reports"
|
||||||
|
|
||||||
|
// PanicReportJournalTail is the number of lines captured from the end of
|
||||||
|
// the lotus journal to be included in the panic report.
|
||||||
|
var PanicReportJournalTail = defaultJournalTail
|
||||||
|
|
||||||
|
// GeneratePanicReport produces a timestamped dump of the application state
|
||||||
|
// for inspection and debugging purposes. Call this function from any place
|
||||||
|
// where a panic or severe error needs to be examined. `persistPath` is the
|
||||||
|
// path where the reports should be saved. `repoPath` is the path where the
|
||||||
|
// journal should be read from. `label` is an optional string to include
|
||||||
|
// next to the report timestamp.
|
||||||
|
func GeneratePanicReport(persistPath, repoPath, label string) {
|
||||||
|
// make sure we always dump the latest logs on the way out
|
||||||
|
// especially since we're probably panicking
|
||||||
|
defer panicLog.Sync() //nolint:errcheck
|
||||||
|
|
||||||
|
if persistPath == "" && repoPath == "" {
|
||||||
|
panicLog.Warn("missing persist and repo paths, aborting panic report creation")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
reportPath := filepath.Join(repoPath, PanicReportingPath, generateReportName(label))
|
||||||
|
if persistPath != "" {
|
||||||
|
reportPath = filepath.Join(persistPath, generateReportName(label))
|
||||||
|
}
|
||||||
|
panicLog.Warnf("generating panic report at %s", reportPath)
|
||||||
|
|
||||||
|
tl := os.Getenv("LOTUS_PANIC_JOURNAL_LOOKBACK")
|
||||||
|
if tl != "" && PanicReportJournalTail == defaultJournalTail {
|
||||||
|
i, err := strconv.Atoi(tl)
|
||||||
|
if err == nil {
|
||||||
|
PanicReportJournalTail = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := os.MkdirAll(reportPath, 0755)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
writeAppVersion(filepath.Join(reportPath, "version"))
|
||||||
|
writeStackTrace(filepath.Join(reportPath, "stacktrace.dump"))
|
||||||
|
writeProfile("goroutines", filepath.Join(reportPath, "goroutines.pprof.gz"))
|
||||||
|
writeProfile("heap", filepath.Join(reportPath, "heap.pprof.gz"))
|
||||||
|
writeJournalTail(PanicReportJournalTail, repoPath, filepath.Join(reportPath, "journal.ndjson"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeAppVersion(file string) {
|
||||||
|
f, err := os.Create(file)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
}
|
||||||
|
defer f.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
versionString := []byte(BuildVersion + BuildTypeString() + CurrentCommit + "\n")
|
||||||
|
if _, err := f.Write(versionString); err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeStackTrace(file string) {
|
||||||
|
f, err := os.Create(file)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
}
|
||||||
|
defer f.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
if _, err := f.Write(debug.Stack()); err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeProfile(profileType string, file string) {
|
||||||
|
p := pprof.Lookup(profileType)
|
||||||
|
if p == nil {
|
||||||
|
panicLog.Warnf("%s profile not available", profileType)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f, err := os.Create(file)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
if err := p.WriteTo(f, 0); err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeJournalTail(tailLen int, repoPath, file string) {
|
||||||
|
if repoPath == "" {
|
||||||
|
panicLog.Warn("repo path is empty, aborting copy of journal log")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Create(file)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close() //nolint:errcheck
|
||||||
|
|
||||||
|
jPath, err := getLatestJournalFilePath(repoPath)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Warnf("failed getting latest journal: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
j, err := os.OpenFile(jPath, os.O_RDONLY, 0400)
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
js, err := j.Stat()
|
||||||
|
if err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
jScan := backscanner.New(j, int(js.Size()))
|
||||||
|
linesWritten := 0
|
||||||
|
for {
|
||||||
|
if linesWritten > tailLen {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
line, _, err := jScan.LineBytes()
|
||||||
|
if err != nil {
|
||||||
|
if err != io.EOF {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if _, err := f.Write(line); err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if _, err := f.Write([]byte("\n")); err != nil {
|
||||||
|
panicLog.Error(err.Error())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
linesWritten++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getLatestJournalFilePath(repoPath string) (string, error) {
|
||||||
|
journalPath := filepath.Join(repoPath, "journal")
|
||||||
|
entries, err := os.ReadDir(journalPath)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return filepath.Join(journalPath, entries[len(entries)-1].Name()), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateReportName(label string) string {
|
||||||
|
label = strings.ReplaceAll(label, " ", "")
|
||||||
|
return fmt.Sprintf("report_%s_%s", label, time.Now().Format("2006-01-02T150405"))
|
||||||
|
}
|
@ -15,7 +15,7 @@ const (
|
|||||||
BuildButterflynet = 0x7
|
BuildButterflynet = 0x7
|
||||||
)
|
)
|
||||||
|
|
||||||
func buildType() string {
|
func BuildTypeString() string {
|
||||||
switch BuildType {
|
switch BuildType {
|
||||||
case BuildDefault:
|
case BuildDefault:
|
||||||
return ""
|
return ""
|
||||||
@ -44,5 +44,5 @@ func UserVersion() string {
|
|||||||
return BuildVersion
|
return BuildVersion
|
||||||
}
|
}
|
||||||
|
|
||||||
return BuildVersion + buildType() + CurrentCommit
|
return BuildVersion + BuildTypeString() + CurrentCommit
|
||||||
}
|
}
|
||||||
|
@ -113,6 +113,12 @@ func main() {
|
|||||||
Usage: "use color in display output",
|
Usage: "use color in display output",
|
||||||
DefaultText: "depends on output being a TTY",
|
DefaultText: "depends on output being a TTY",
|
||||||
},
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "panic-reports",
|
||||||
|
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
|
||||||
|
Hidden: true,
|
||||||
|
Value: "~/.lotusminer", // should follow --repo default
|
||||||
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "repo",
|
Name: "repo",
|
||||||
EnvVars: []string{"LOTUS_PATH"},
|
EnvVars: []string{"LOTUS_PATH"},
|
||||||
@ -146,6 +152,14 @@ func main() {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
|
After: func(c *cli.Context) error {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
// Generate report in LOTUS_PATH and re-raise panic
|
||||||
|
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagMinerRepo), c.App.Name)
|
||||||
|
panic(r)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
}
|
}
|
||||||
app.Setup()
|
app.Setup()
|
||||||
app.Metadata["repoType"] = repo.StorageMiner
|
app.Metadata["repoType"] = repo.StorageMiner
|
||||||
|
@ -75,6 +75,12 @@ func main() {
|
|||||||
Value: "~/.lotusworker", // TODO: Consider XDG_DATA_HOME
|
Value: "~/.lotusworker", // TODO: Consider XDG_DATA_HOME
|
||||||
Usage: fmt.Sprintf("Specify worker repo path. flag %s and env WORKER_PATH are DEPRECATION, will REMOVE SOON", FlagWorkerRepoDeprecation),
|
Usage: fmt.Sprintf("Specify worker repo path. flag %s and env WORKER_PATH are DEPRECATION, will REMOVE SOON", FlagWorkerRepoDeprecation),
|
||||||
},
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "panic-reports",
|
||||||
|
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
|
||||||
|
Hidden: true,
|
||||||
|
Value: "~/.lotusworker", // should follow --repo default
|
||||||
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "miner-repo",
|
Name: "miner-repo",
|
||||||
Aliases: []string{"storagerepo"},
|
Aliases: []string{"storagerepo"},
|
||||||
@ -89,6 +95,14 @@ func main() {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
After: func(c *cli.Context) error {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
// Generate report in LOTUS_PATH and re-raise panic
|
||||||
|
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagWorkerRepo), c.App.Name)
|
||||||
|
panic(r)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
Commands: local,
|
Commands: local,
|
||||||
}
|
}
|
||||||
app.Setup()
|
app.Setup()
|
||||||
|
@ -67,6 +67,12 @@ func main() {
|
|||||||
Version: build.UserVersion(),
|
Version: build.UserVersion(),
|
||||||
EnableBashCompletion: true,
|
EnableBashCompletion: true,
|
||||||
Flags: []cli.Flag{
|
Flags: []cli.Flag{
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "panic-reports",
|
||||||
|
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
|
||||||
|
Hidden: true,
|
||||||
|
Value: "~/.lotus", // should follow --repo default
|
||||||
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "repo",
|
Name: "repo",
|
||||||
EnvVars: []string{"LOTUS_PATH"},
|
EnvVars: []string{"LOTUS_PATH"},
|
||||||
@ -84,6 +90,14 @@ func main() {
|
|||||||
},
|
},
|
||||||
cliutil.FlagVeryVerbose,
|
cliutil.FlagVeryVerbose,
|
||||||
},
|
},
|
||||||
|
After: func(c *cli.Context) error {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
// Generate report in LOTUS_PATH and re-raise panic
|
||||||
|
build.GeneratePanicReport(c.String("panic-reports"), c.String("repo"), c.App.Name)
|
||||||
|
panic(r)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
|
||||||
Commands: append(local, lcli.Commands...),
|
Commands: append(local, lcli.Commands...),
|
||||||
}
|
}
|
||||||
|
1
go.mod
1
go.mod
@ -63,6 +63,7 @@ require (
|
|||||||
github.com/hannahhoward/go-pubsub v0.0.0-20200423002714-8d62886cc36e
|
github.com/hannahhoward/go-pubsub v0.0.0-20200423002714-8d62886cc36e
|
||||||
github.com/hashicorp/go-multierror v1.1.1
|
github.com/hashicorp/go-multierror v1.1.1
|
||||||
github.com/hashicorp/golang-lru v0.5.4
|
github.com/hashicorp/golang-lru v0.5.4
|
||||||
|
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94
|
||||||
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
|
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
|
||||||
github.com/ipfs/bbloom v0.0.4
|
github.com/ipfs/bbloom v0.0.4
|
||||||
github.com/ipfs/go-bitswap v0.3.4
|
github.com/ipfs/go-bitswap v0.3.4
|
||||||
|
4
go.sum
4
go.sum
@ -605,6 +605,10 @@ github.com/iancoleman/orderedmap v0.1.0 h1:2orAxZBJsvimgEBmMWfXaFlzSG2fbQil5qzP3
|
|||||||
github.com/iancoleman/orderedmap v0.1.0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
github.com/iancoleman/orderedmap v0.1.0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||||
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428/go.mod h1:uhpZMVGznybq1itEKXj6RYw9I71qK4kH+OGMjRC4KEo=
|
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428/go.mod h1:uhpZMVGznybq1itEKXj6RYw9I71qK4kH+OGMjRC4KEo=
|
||||||
|
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94 h1:9tcYMdi+7Rb1y0E9Del1DRHui7Ne3za5lLw6CjMJv/M=
|
||||||
|
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94/go.mod h1:GYeBD1CF7AqnKZK+UCytLcY3G+UKo0ByXX/3xfdNyqQ=
|
||||||
|
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
|
||||||
|
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA=
|
||||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||||
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
|
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
|
||||||
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
|
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
|
||||||
|
Loading…
Reference in New Issue
Block a user