Merge pull request #7341 from filecoin-project/mg/feat/panic-reporter

feat: Catch panic to generate report and reraise
This commit is contained in:
Łukasz Magiera 2021-10-01 10:50:19 +01:00 committed by GitHub
commit 95e8b59367
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 232 additions and 2 deletions

183
build/panic_reporter.go Normal file
View File

@ -0,0 +1,183 @@
package build
import (
"fmt"
"io"
"os"
"path/filepath"
"runtime/debug"
"runtime/pprof"
"strconv"
"strings"
"time"
"github.com/icza/backscanner"
logging "github.com/ipfs/go-log/v2"
)
var (
panicLog = logging.Logger("panic-reporter")
defaultJournalTail = 500
)
// PanicReportingPath is the name of the subdir created within the repoPath
// path provided to GeneratePanicReport
var PanicReportingPath = "panic-reports"
// PanicReportJournalTail is the number of lines captured from the end of
// the lotus journal to be included in the panic report.
var PanicReportJournalTail = defaultJournalTail
// GeneratePanicReport produces a timestamped dump of the application state
// for inspection and debugging purposes. Call this function from any place
// where a panic or severe error needs to be examined. `persistPath` is the
// path where the reports should be saved. `repoPath` is the path where the
// journal should be read from. `label` is an optional string to include
// next to the report timestamp.
func GeneratePanicReport(persistPath, repoPath, label string) {
// make sure we always dump the latest logs on the way out
// especially since we're probably panicking
defer panicLog.Sync() //nolint:errcheck
if persistPath == "" && repoPath == "" {
panicLog.Warn("missing persist and repo paths, aborting panic report creation")
return
}
reportPath := filepath.Join(repoPath, PanicReportingPath, generateReportName(label))
if persistPath != "" {
reportPath = filepath.Join(persistPath, generateReportName(label))
}
panicLog.Warnf("generating panic report at %s", reportPath)
tl := os.Getenv("LOTUS_PANIC_JOURNAL_LOOKBACK")
if tl != "" && PanicReportJournalTail == defaultJournalTail {
i, err := strconv.Atoi(tl)
if err == nil {
PanicReportJournalTail = i
}
}
err := os.MkdirAll(reportPath, 0755)
if err != nil {
panicLog.Error(err.Error())
return
}
writeAppVersion(filepath.Join(reportPath, "version"))
writeStackTrace(filepath.Join(reportPath, "stacktrace.dump"))
writeProfile("goroutines", filepath.Join(reportPath, "goroutines.pprof.gz"))
writeProfile("heap", filepath.Join(reportPath, "heap.pprof.gz"))
writeJournalTail(PanicReportJournalTail, repoPath, filepath.Join(reportPath, "journal.ndjson"))
}
func writeAppVersion(file string) {
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
}
defer f.Close() //nolint:errcheck
versionString := []byte(BuildVersion + BuildTypeString() + CurrentCommit + "\n")
if _, err := f.Write(versionString); err != nil {
panicLog.Error(err.Error())
}
}
func writeStackTrace(file string) {
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
}
defer f.Close() //nolint:errcheck
if _, err := f.Write(debug.Stack()); err != nil {
panicLog.Error(err.Error())
}
}
func writeProfile(profileType string, file string) {
p := pprof.Lookup(profileType)
if p == nil {
panicLog.Warnf("%s profile not available", profileType)
return
}
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
return
}
defer f.Close() //nolint:errcheck
if err := p.WriteTo(f, 0); err != nil {
panicLog.Error(err.Error())
}
}
func writeJournalTail(tailLen int, repoPath, file string) {
if repoPath == "" {
panicLog.Warn("repo path is empty, aborting copy of journal log")
return
}
f, err := os.Create(file)
if err != nil {
panicLog.Error(err.Error())
return
}
defer f.Close() //nolint:errcheck
jPath, err := getLatestJournalFilePath(repoPath)
if err != nil {
panicLog.Warnf("failed getting latest journal: %s", err.Error())
return
}
j, err := os.OpenFile(jPath, os.O_RDONLY, 0400)
if err != nil {
panicLog.Error(err.Error())
return
}
js, err := j.Stat()
if err != nil {
panicLog.Error(err.Error())
return
}
jScan := backscanner.New(j, int(js.Size()))
linesWritten := 0
for {
if linesWritten > tailLen {
break
}
line, _, err := jScan.LineBytes()
if err != nil {
if err != io.EOF {
panicLog.Error(err.Error())
}
break
}
if _, err := f.Write(line); err != nil {
panicLog.Error(err.Error())
break
}
if _, err := f.Write([]byte("\n")); err != nil {
panicLog.Error(err.Error())
break
}
linesWritten++
}
}
func getLatestJournalFilePath(repoPath string) (string, error) {
journalPath := filepath.Join(repoPath, "journal")
entries, err := os.ReadDir(journalPath)
if err != nil {
return "", err
}
return filepath.Join(journalPath, entries[len(entries)-1].Name()), nil
}
func generateReportName(label string) string {
label = strings.ReplaceAll(label, " ", "")
return fmt.Sprintf("report_%s_%s", label, time.Now().Format("2006-01-02T150405"))
}

View File

@ -15,7 +15,7 @@ const (
BuildButterflynet = 0x7
)
func buildType() string {
func BuildTypeString() string {
switch BuildType {
case BuildDefault:
return ""
@ -44,5 +44,5 @@ func UserVersion() string {
return BuildVersion
}
return BuildVersion + buildType() + CurrentCommit
return BuildVersion + BuildTypeString() + CurrentCommit
}

View File

@ -113,6 +113,12 @@ func main() {
Usage: "use color in display output",
DefaultText: "depends on output being a TTY",
},
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotusminer", // should follow --repo default
},
&cli.StringFlag{
Name: "repo",
EnvVars: []string{"LOTUS_PATH"},
@ -146,6 +152,14 @@ func main() {
}
return nil
},
After: func(c *cli.Context) error {
if r := recover(); r != nil {
// Generate report in LOTUS_PATH and re-raise panic
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagMinerRepo), c.App.Name)
panic(r)
}
return nil
},
}
app.Setup()
app.Metadata["repoType"] = repo.StorageMiner

View File

@ -75,6 +75,12 @@ func main() {
Value: "~/.lotusworker", // TODO: Consider XDG_DATA_HOME
Usage: fmt.Sprintf("Specify worker repo path. flag %s and env WORKER_PATH are DEPRECATION, will REMOVE SOON", FlagWorkerRepoDeprecation),
},
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotusworker", // should follow --repo default
},
&cli.StringFlag{
Name: "miner-repo",
Aliases: []string{"storagerepo"},
@ -89,6 +95,14 @@ func main() {
},
},
After: func(c *cli.Context) error {
if r := recover(); r != nil {
// Generate report in LOTUS_PATH and re-raise panic
build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagWorkerRepo), c.App.Name)
panic(r)
}
return nil
},
Commands: local,
}
app.Setup()

View File

@ -67,6 +67,12 @@ func main() {
Version: build.UserVersion(),
EnableBashCompletion: true,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "panic-reports",
EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"},
Hidden: true,
Value: "~/.lotus", // should follow --repo default
},
&cli.StringFlag{
Name: "repo",
EnvVars: []string{"LOTUS_PATH"},
@ -84,6 +90,14 @@ func main() {
},
cliutil.FlagVeryVerbose,
},
After: func(c *cli.Context) error {
if r := recover(); r != nil {
// Generate report in LOTUS_PATH and re-raise panic
build.GeneratePanicReport(c.String("panic-reports"), c.String("repo"), c.App.Name)
panic(r)
}
return nil
},
Commands: append(local, lcli.Commands...),
}

1
go.mod
View File

@ -63,6 +63,7 @@ require (
github.com/hannahhoward/go-pubsub v0.0.0-20200423002714-8d62886cc36e
github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/golang-lru v0.5.4
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d
github.com/ipfs/bbloom v0.0.4
github.com/ipfs/go-bitswap v0.3.4

4
go.sum
View File

@ -605,6 +605,10 @@ github.com/iancoleman/orderedmap v0.1.0 h1:2orAxZBJsvimgEBmMWfXaFlzSG2fbQil5qzP3
github.com/iancoleman/orderedmap v0.1.0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428/go.mod h1:uhpZMVGznybq1itEKXj6RYw9I71qK4kH+OGMjRC4KEo=
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94 h1:9tcYMdi+7Rb1y0E9Del1DRHui7Ne3za5lLw6CjMJv/M=
github.com/icza/backscanner v0.0.0-20210726202459-ac2ffc679f94/go.mod h1:GYeBD1CF7AqnKZK+UCytLcY3G+UKo0ByXX/3xfdNyqQ=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d h1:/WZQPMZNsjZ7IlCpsLGdQBINg5bxKQ1K1sh6awxLtkA=
github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=