Merge pull request #7341 from filecoin-project/mg/feat/panic-reporter
feat: Catch panic to generate report and reraise
This commit is contained in:
commit
95e8b59367
183
build/panic_reporter.go
Normal file
183
build/panic_reporter.go
Normal file
@ -0,0 +1,183 @@
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"runtime/pprof"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/icza/backscanner"
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
panicLog = logging.Logger("panic-reporter")
|
||||
defaultJournalTail = 500
|
||||
)
|
||||
|
||||
// PanicReportingPath is the name of the subdir created within the repoPath
|
||||
// path provided to GeneratePanicReport
|
||||
var PanicReportingPath = "panic-reports"
|
||||
|
||||
// PanicReportJournalTail is the number of lines captured from the end of
|
||||
// the lotus journal to be included in the panic report.
|
||||
var PanicReportJournalTail = defaultJournalTail
|
||||
|
||||
// GeneratePanicReport produces a timestamped dump of the application state
|
||||
// for inspection and debugging purposes. Call this function from any place
|
||||
// where a panic or severe error needs to be examined. `persistPath` is the
|
||||
// path where the reports should be saved. `repoPath` is the path where the
|
||||
// journal should be read from. `label` is an optional string to include
|
||||
// next to the report timestamp.
|
||||
func GeneratePanicReport(persistPath, repoPath, label string) {
|
||||
// make sure we always dump the latest logs on the way out
|
||||
// especially since we're probably panicking
|
||||
defer panicLog.Sync() //nolint:errcheck
|
||||
|
||||
if persistPath == "" && repoPath == "" {
|
||||
panicLog.Warn("missing persist and repo paths, aborting panic report creation")
|
||||
return
|
||||
}
|
||||
|
||||
reportPath := filepath.Join(repoPath, PanicReportingPath, generateReportName(label))
|
||||
if persistPath != "" {
|
||||
reportPath = filepath.Join(persistPath, generateReportName(label))
|
||||
}
|
||||
panicLog.Warnf("generating panic report at %s", reportPath)
|
||||
|
||||
tl := os.Getenv("LOTUS_PANIC_JOURNAL_LOOKBACK")
|
||||
if tl != "" && PanicReportJournalTail == defaultJournalTail {
|
||||
i, err := strconv.Atoi(tl)
|
||||
if err == nil {
|
||||
PanicReportJournalTail = i
|
||||
}
|
||||
}
|
||||
|
||||
err := os.MkdirAll(reportPath, 0755)
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
writeAppVersion(filepath.Join(reportPath, "version"))
|
||||
writeStackTrace(filepath.Join(reportPath, "stacktrace.dump"))
|
||||
writeProfile("goroutines", filepath.Join(reportPath, "goroutines.pprof.gz"))
|
||||
writeProfile("heap", filepath.Join(reportPath, "heap.pprof.gz"))
|
||||
writeJournalTail(PanicReportJournalTail, repoPath, filepath.Join(reportPath, "journal.ndjson"))
|
||||
}
|
||||
|
||||
func writeAppVersion(file string) {
|
||||
f, err := os.Create(file)
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
}
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
versionString := []byte(BuildVersion + BuildTypeString() + CurrentCommit + "\n")
|
||||
if _, err := f.Write(versionString); err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func writeStackTrace(file string) {
|
||||
f, err := os.Create(file)
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
}
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
if _, err := f.Write(debug.Stack()); err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func writeProfile(profileType string, file string) {
|
||||
p := pprof.Lookup(profileType)
|
||||
if p == nil {
|
||||
panicLog.Warnf("%s profile not available", profileType)
|
||||
return
|
||||
}
|
||||
f, err := os.Create(file)
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
return
|
||||
}
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
if err := p.WriteTo(f, 0); err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func writeJournalTail(tailLen int, repoPath, file string) {
|
||||
if repoPath == "" {
|
||||
panicLog.Warn("repo path is empty, aborting copy of journal log")
|
||||
return
|
||||
}
|
||||
|
||||
f, err := os.Create(file)
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
return
|
||||
}
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
jPath, err := getLatestJournalFilePath(repoPath)
|
||||
if err != nil {
|
||||
panicLog.Warnf("failed getting latest journal: %s", err.Error())
|
||||
return
|
||||
}
|
||||
j, err := os.OpenFile(jPath, os.O_RDONLY, 0400)
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
return
|
||||
}
|
||||
js, err := j.Stat()
|
||||
if err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
return
|
||||
}
|
||||
jScan := backscanner.New(j, int(js.Size()))
|
||||
linesWritten := 0
|
||||
for {
|
||||
if linesWritten > tailLen {
|
||||
break
|
||||
}
|
||||
line, _, err := jScan.LineBytes()
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
panicLog.Error(err.Error())
|
||||
}
|
||||
break
|
||||
}
|
||||
if _, err := f.Write(line); err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
break
|
||||
}
|
||||
if _, err := f.Write([]byte("\n")); err != nil {
|
||||
panicLog.Error(err.Error())
|
||||
break
|
||||
}
|
||||
linesWritten++
|
||||
}
|
||||
}
|
||||
|
||||
func getLatestJournalFilePath(repoPath string) (string, error) {
|
||||
journalPath := filepath.Join(repoPath, "journal")
|
||||
entries, err := os.ReadDir(journalPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(journalPath, entries[len(entries)-1].Name()), nil
|
||||
}
|
||||
|
||||
func generateReportName(label string) string {
|
||||
label = strings.ReplaceAll(label, " ", "")
|
||||
return fmt.Sprintf("report_%s_%s", label, time.Now().Format("2006-01-02T150405"))
|
||||
}
|
@ -15,7 +15,7 @@ const (
|
||||
BuildButterflynet = 0x7
|
||||
)
|
||||
|
||||
func buildType() string {
|
||||
func BuildTypeString() string {
|
||||
switch BuildType {
|
||||
case BuildDefault:
|
||||
return ""
|
||||
@ -44,5 +44,5 @@ func UserVersion() string {
|
||||
return BuildVersion
|
||||
}
|
||||
|
||||
return BuildVersion + buildType() + CurrentCommit
|
||||
return BuildVersion + BuildTypeString() + CurrentCommit
|
||||
}
|
||||
|
@ -113,6 +113,12 @@ func main() {
|
||||
Usage: "use color in display output",
|
||||
DefaultText: "depends on output being a TTY",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "panic-reports",
|
||||
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
|
||||
Hidden: true,
|
||||
Value: "~/.lotusminer", // should follow --repo default
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "repo",
|
||||
EnvVars: []string{"LOTUS_PATH"},
|
||||
@ -146,6 +152,14 @@ func main() {
|
||||
}
|
||||
return nil
|
||||
},
|
||||
After: func(c *cli.Context) error {
|
||||
if r := recover(); r != nil {
|
||||
// Generate report in LOTUS_PATH and re-raise panic
|
||||
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagMinerRepo), c.App.Name)
|
||||
panic(r)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
app.Setup()
|
||||
app.Metadata["repoType"] = repo.StorageMiner
|
||||
|
@ -75,6 +75,12 @@ func main() {
|
||||
Value: "~/.lotusworker", // TODO: Consider XDG_DATA_HOME
|
||||
Usage: fmt.Sprintf("Specify worker repo path. flag %s and env WORKER_PATH are DEPRECATION, will REMOVE SOON", FlagWorkerRepoDeprecation),
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "panic-reports",
|
||||
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
|
||||
Hidden: true,
|
||||
Value: "~/.lotusworker", // should follow --repo default
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "miner-repo",
|
||||
Aliases: []string{"storagerepo"},
|
||||
@ -89,6 +95,14 @@ func main() {
|
||||
},
|
||||
},
|
||||
|
||||
After: func(c *cli.Context) error {
|
||||
if r := recover(); r != nil {
|
||||
// Generate report in LOTUS_PATH and re-raise panic
|
||||
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagWorkerRepo), c.App.Name)
|
||||
panic(r)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
Commands: local,
|
||||
}
|
||||
app.Setup()
|
||||
|
@ -67,6 +67,12 @@ func main() {
|
||||
Version: build.UserVersion(),
|
||||
EnableBashCompletion: true,
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "panic-reports",
|
||||
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
|
||||
Hidden: true,
|
||||
Value: "~/.lotus", // should follow --repo default
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "repo",
|
||||
EnvVars: []string{"LOTUS_PATH"},
|
||||
@ -84,6 +90,14 @@ func main() {
|
||||
},
|
||||
cliutil.FlagVeryVerbose,
|
||||
},
|
||||
After: func(c *cli.Context) error {
|
||||
if r := recover(); r != nil {
|
||||
// Generate report in LOTUS_PATH and re-raise panic
|
||||
build.GeneratePanicReport(c.String("panic-reports"), c.String("repo"), c.App.Name)
|
||||
panic(r)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
|
||||
Commands: append(local, lcli.Commands...),
|
||||
}
|
||||
|
1
go.mod
1
go.mod
@ -63,6 +63,7 @@ require (
|
||||
github.com/hannahhoward/go-pubsub v0.0.0-20200423002714-8d62886cc36e
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/hashicorp/golang-lru v0.5.4
|
||||
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94
|
||||
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
|
||||
github.com/ipfs/bbloom v0.0.4
|
||||
github.com/ipfs/go-bitswap v0.3.4
|
||||
|
4
go.sum
4
go.sum
@ -605,6 +605,10 @@ github.com/iancoleman/orderedmap v0.1.0 h1:2orAxZBJsvimgEBmMWfXaFlzSG2fbQil5qzP3
|
||||
github.com/iancoleman/orderedmap v0.1.0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428/go.mod h1:uhpZMVGznybq1itEKXj6RYw9I71qK4kH+OGMjRC4KEo=
|
||||
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94 h1:9tcYMdi+7Rb1y0E9Del1DRHui7Ne3za5lLw6CjMJv/M=
|
||||
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94/go.mod h1:GYeBD1CF7AqnKZK+UCytLcY3G+UKo0ByXX/3xfdNyqQ=
|
||||
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
|
||||
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA=
|
||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
|
||||
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
|
||||
|
Loading…
Reference in New Issue
Block a user