Add pprof labels in processes and for lifecycles (#19202)
Use pprof labelling to help identify goroutines with stacks. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
e48f3b0527
commit
5fe764b1eb
@ -6,6 +6,7 @@ package graceful
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"runtime/pprof"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -62,7 +63,6 @@ type WithCallback func(callback func())
|
|||||||
// Similarly the callback function provided to atTerminate must return once termination is complete.
|
// Similarly the callback function provided to atTerminate must return once termination is complete.
|
||||||
// Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals
|
// Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals
|
||||||
// - users must therefore be careful to only call these as necessary.
|
// - users must therefore be careful to only call these as necessary.
|
||||||
// If run is not expected to run indefinitely RunWithShutdownChan is likely to be more appropriate.
|
|
||||||
type RunnableWithShutdownFns func(atShutdown, atTerminate func(func()))
|
type RunnableWithShutdownFns func(atShutdown, atTerminate func(func()))
|
||||||
|
|
||||||
// RunWithShutdownFns takes a function that has both atShutdown and atTerminate callbacks
|
// RunWithShutdownFns takes a function that has both atShutdown and atTerminate callbacks
|
||||||
@ -70,7 +70,6 @@ type RunnableWithShutdownFns func(atShutdown, atTerminate func(func()))
|
|||||||
// Similarly the callback function provided to atTerminate must return once termination is complete.
|
// Similarly the callback function provided to atTerminate must return once termination is complete.
|
||||||
// Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals
|
// Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals
|
||||||
// - users must therefore be careful to only call these as necessary.
|
// - users must therefore be careful to only call these as necessary.
|
||||||
// If run is not expected to run indefinitely RunWithShutdownChan is likely to be more appropriate.
|
|
||||||
func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) {
|
func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) {
|
||||||
g.runningServerWaitGroup.Add(1)
|
g.runningServerWaitGroup.Add(1)
|
||||||
defer g.runningServerWaitGroup.Done()
|
defer g.runningServerWaitGroup.Done()
|
||||||
@ -98,32 +97,6 @@ func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunnableWithShutdownChan is a runnable with functions to run at shutdown and terminate.
|
|
||||||
// After the atShutdown channel is closed, the main function must return once shutdown is complete.
|
|
||||||
// (Optionally IsHammer may be waited for instead however, this should be avoided if possible.)
|
|
||||||
// The callback function provided to atTerminate must return once termination is complete.
|
|
||||||
// Please note that use of the atTerminate function will create a go-routine that will wait till terminate - users must therefore be careful to only call this as necessary.
|
|
||||||
type RunnableWithShutdownChan func(atShutdown <-chan struct{}, atTerminate WithCallback)
|
|
||||||
|
|
||||||
// RunWithShutdownChan takes a function that has channel to watch for shutdown and atTerminate callbacks
|
|
||||||
// After the atShutdown channel is closed, the main function must return once shutdown is complete.
|
|
||||||
// (Optionally IsHammer may be waited for instead however, this should be avoided if possible.)
|
|
||||||
// The callback function provided to atTerminate must return once termination is complete.
|
|
||||||
// Please note that use of the atTerminate function will create a go-routine that will wait till terminate - users must therefore be careful to only call this as necessary.
|
|
||||||
func (g *Manager) RunWithShutdownChan(run RunnableWithShutdownChan) {
|
|
||||||
g.runningServerWaitGroup.Add(1)
|
|
||||||
defer g.runningServerWaitGroup.Done()
|
|
||||||
defer func() {
|
|
||||||
if err := recover(); err != nil {
|
|
||||||
log.Critical("PANIC during RunWithShutdownChan: %v\nStacktrace: %s", err, log.Stack(2))
|
|
||||||
g.doShutdown()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
run(g.IsShutdown(), func(atTerminate func()) {
|
|
||||||
g.RunAtTerminate(atTerminate)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// RunWithShutdownContext takes a function that has a context to watch for shutdown.
|
// RunWithShutdownContext takes a function that has a context to watch for shutdown.
|
||||||
// After the provided context is Done(), the main function must return once shutdown is complete.
|
// After the provided context is Done(), the main function must return once shutdown is complete.
|
||||||
// (Optionally the HammerContext may be obtained and waited for however, this should be avoided if possible.)
|
// (Optionally the HammerContext may be obtained and waited for however, this should be avoided if possible.)
|
||||||
@ -136,7 +109,9 @@ func (g *Manager) RunWithShutdownContext(run func(context.Context)) {
|
|||||||
g.doShutdown()
|
g.doShutdown()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
run(g.ShutdownContext())
|
ctx := g.ShutdownContext()
|
||||||
|
pprof.SetGoroutineLabels(ctx) // We don't have a label to restore back to but I think this is fine
|
||||||
|
run(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunAtTerminate adds to the terminate wait group and creates a go-routine to run the provided function at termination
|
// RunAtTerminate adds to the terminate wait group and creates a go-routine to run the provided function at termination
|
||||||
@ -198,6 +173,8 @@ func (g *Manager) doShutdown() {
|
|||||||
}
|
}
|
||||||
g.lock.Lock()
|
g.lock.Lock()
|
||||||
g.shutdownCtxCancel()
|
g.shutdownCtxCancel()
|
||||||
|
atShutdownCtx := pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "post-shutdown"))
|
||||||
|
pprof.SetGoroutineLabels(atShutdownCtx)
|
||||||
for _, fn := range g.toRunAtShutdown {
|
for _, fn := range g.toRunAtShutdown {
|
||||||
go fn()
|
go fn()
|
||||||
}
|
}
|
||||||
@ -214,7 +191,7 @@ func (g *Manager) doShutdown() {
|
|||||||
g.doTerminate()
|
g.doTerminate()
|
||||||
g.WaitForTerminate()
|
g.WaitForTerminate()
|
||||||
g.lock.Lock()
|
g.lock.Lock()
|
||||||
g.doneCtxCancel()
|
g.managerCtxCancel()
|
||||||
g.lock.Unlock()
|
g.lock.Unlock()
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
@ -227,6 +204,8 @@ func (g *Manager) doHammerTime(d time.Duration) {
|
|||||||
default:
|
default:
|
||||||
log.Warn("Setting Hammer condition")
|
log.Warn("Setting Hammer condition")
|
||||||
g.hammerCtxCancel()
|
g.hammerCtxCancel()
|
||||||
|
atHammerCtx := pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "post-hammer"))
|
||||||
|
pprof.SetGoroutineLabels(atHammerCtx)
|
||||||
for _, fn := range g.toRunAtHammer {
|
for _, fn := range g.toRunAtHammer {
|
||||||
go fn()
|
go fn()
|
||||||
}
|
}
|
||||||
@ -244,6 +223,9 @@ func (g *Manager) doTerminate() {
|
|||||||
default:
|
default:
|
||||||
log.Warn("Terminating")
|
log.Warn("Terminating")
|
||||||
g.terminateCtxCancel()
|
g.terminateCtxCancel()
|
||||||
|
atTerminateCtx := pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "post-terminate"))
|
||||||
|
pprof.SetGoroutineLabels(atTerminateCtx)
|
||||||
|
|
||||||
for _, fn := range g.toRunAtTerminate {
|
for _, fn := range g.toRunAtTerminate {
|
||||||
go fn()
|
go fn()
|
||||||
}
|
}
|
||||||
@ -331,20 +313,20 @@ func (g *Manager) InformCleanup() {
|
|||||||
|
|
||||||
// Done allows the manager to be viewed as a context.Context, it returns a channel that is closed when the server is finished terminating
|
// Done allows the manager to be viewed as a context.Context, it returns a channel that is closed when the server is finished terminating
|
||||||
func (g *Manager) Done() <-chan struct{} {
|
func (g *Manager) Done() <-chan struct{} {
|
||||||
return g.doneCtx.Done()
|
return g.managerCtx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Err allows the manager to be viewed as a context.Context done at Terminate
|
// Err allows the manager to be viewed as a context.Context done at Terminate
|
||||||
func (g *Manager) Err() error {
|
func (g *Manager) Err() error {
|
||||||
return g.doneCtx.Err()
|
return g.managerCtx.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Value allows the manager to be viewed as a context.Context done at Terminate
|
// Value allows the manager to be viewed as a context.Context done at Terminate
|
||||||
func (g *Manager) Value(key interface{}) interface{} {
|
func (g *Manager) Value(key interface{}) interface{} {
|
||||||
return g.doneCtx.Value(key)
|
return g.managerCtx.Value(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deadline returns nil as there is no fixed Deadline for the manager, it allows the manager to be viewed as a context.Context
|
// Deadline returns nil as there is no fixed Deadline for the manager, it allows the manager to be viewed as a context.Context
|
||||||
func (g *Manager) Deadline() (deadline time.Time, ok bool) {
|
func (g *Manager) Deadline() (deadline time.Time, ok bool) {
|
||||||
return g.doneCtx.Deadline()
|
return g.managerCtx.Deadline()
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
|
"runtime/pprof"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
@ -29,11 +30,11 @@ type Manager struct {
|
|||||||
shutdownCtx context.Context
|
shutdownCtx context.Context
|
||||||
hammerCtx context.Context
|
hammerCtx context.Context
|
||||||
terminateCtx context.Context
|
terminateCtx context.Context
|
||||||
doneCtx context.Context
|
managerCtx context.Context
|
||||||
shutdownCtxCancel context.CancelFunc
|
shutdownCtxCancel context.CancelFunc
|
||||||
hammerCtxCancel context.CancelFunc
|
hammerCtxCancel context.CancelFunc
|
||||||
terminateCtxCancel context.CancelFunc
|
terminateCtxCancel context.CancelFunc
|
||||||
doneCtxCancel context.CancelFunc
|
managerCtxCancel context.CancelFunc
|
||||||
runningServerWaitGroup sync.WaitGroup
|
runningServerWaitGroup sync.WaitGroup
|
||||||
createServerWaitGroup sync.WaitGroup
|
createServerWaitGroup sync.WaitGroup
|
||||||
terminateWaitGroup sync.WaitGroup
|
terminateWaitGroup sync.WaitGroup
|
||||||
@ -58,7 +59,17 @@ func (g *Manager) start(ctx context.Context) {
|
|||||||
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx)
|
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx)
|
||||||
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(ctx)
|
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(ctx)
|
||||||
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(ctx)
|
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(ctx)
|
||||||
g.doneCtx, g.doneCtxCancel = context.WithCancel(ctx)
|
g.managerCtx, g.managerCtxCancel = context.WithCancel(ctx)
|
||||||
|
|
||||||
|
// Next add pprof labels to these contexts
|
||||||
|
g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate"))
|
||||||
|
g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown"))
|
||||||
|
g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer"))
|
||||||
|
g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager"))
|
||||||
|
|
||||||
|
// Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager
|
||||||
|
pprof.SetGoroutineLabels(g.managerCtx)
|
||||||
|
defer pprof.SetGoroutineLabels(ctx)
|
||||||
|
|
||||||
// Set the running state & handle signals
|
// Set the running state & handle signals
|
||||||
g.setState(stateRunning)
|
g.setState(stateRunning)
|
||||||
|
@ -11,6 +11,7 @@ package graceful
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime/pprof"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -40,11 +41,11 @@ type Manager struct {
|
|||||||
shutdownCtx context.Context
|
shutdownCtx context.Context
|
||||||
hammerCtx context.Context
|
hammerCtx context.Context
|
||||||
terminateCtx context.Context
|
terminateCtx context.Context
|
||||||
doneCtx context.Context
|
managerCtx context.Context
|
||||||
shutdownCtxCancel context.CancelFunc
|
shutdownCtxCancel context.CancelFunc
|
||||||
hammerCtxCancel context.CancelFunc
|
hammerCtxCancel context.CancelFunc
|
||||||
terminateCtxCancel context.CancelFunc
|
terminateCtxCancel context.CancelFunc
|
||||||
doneCtxCancel context.CancelFunc
|
managerCtxCancel context.CancelFunc
|
||||||
runningServerWaitGroup sync.WaitGroup
|
runningServerWaitGroup sync.WaitGroup
|
||||||
createServerWaitGroup sync.WaitGroup
|
createServerWaitGroup sync.WaitGroup
|
||||||
terminateWaitGroup sync.WaitGroup
|
terminateWaitGroup sync.WaitGroup
|
||||||
@ -71,7 +72,17 @@ func (g *Manager) start() {
|
|||||||
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(g.ctx)
|
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(g.ctx)
|
||||||
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(g.ctx)
|
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(g.ctx)
|
||||||
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(g.ctx)
|
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(g.ctx)
|
||||||
g.doneCtx, g.doneCtxCancel = context.WithCancel(g.ctx)
|
g.managerCtx, g.managerCtxCancel = context.WithCancel(g.ctx)
|
||||||
|
|
||||||
|
// Next add pprof labels to these contexts
|
||||||
|
g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate"))
|
||||||
|
g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown"))
|
||||||
|
g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer"))
|
||||||
|
g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager"))
|
||||||
|
|
||||||
|
// Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager
|
||||||
|
pprof.SetGoroutineLabels(g.managerCtx)
|
||||||
|
defer pprof.SetGoroutineLabels(g.ctx)
|
||||||
|
|
||||||
// Make channels
|
// Make channels
|
||||||
g.shutdownRequested = make(chan struct{})
|
g.shutdownRequested = make(chan struct{})
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"runtime/pprof"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
@ -66,11 +67,9 @@ func GetManager() *Manager {
|
|||||||
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
||||||
// process table.
|
// process table.
|
||||||
func (pm *Manager) AddContext(parent context.Context, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
func (pm *Manager) AddContext(parent context.Context, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
||||||
parentPID := GetParentPID(parent)
|
|
||||||
|
|
||||||
ctx, cancel = context.WithCancel(parent)
|
ctx, cancel = context.WithCancel(parent)
|
||||||
|
|
||||||
pid, finished := pm.Add(parentPID, description, cancel)
|
ctx, pid, finished := pm.Add(ctx, description, cancel)
|
||||||
|
|
||||||
return &Context{
|
return &Context{
|
||||||
Context: ctx,
|
Context: ctx,
|
||||||
@ -87,11 +86,9 @@ func (pm *Manager) AddContext(parent context.Context, description string) (ctx c
|
|||||||
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
||||||
// process table.
|
// process table.
|
||||||
func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finshed FinishedFunc) {
|
func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finshed FinishedFunc) {
|
||||||
parentPID := GetParentPID(parent)
|
|
||||||
|
|
||||||
ctx, cancel = context.WithTimeout(parent, timeout)
|
ctx, cancel = context.WithTimeout(parent, timeout)
|
||||||
|
|
||||||
pid, finshed := pm.Add(parentPID, description, cancel)
|
ctx, pid, finshed := pm.Add(ctx, description, cancel)
|
||||||
|
|
||||||
return &Context{
|
return &Context{
|
||||||
Context: ctx,
|
Context: ctx,
|
||||||
@ -100,7 +97,9 @@ func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Durati
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add create a new process
|
// Add create a new process
|
||||||
func (pm *Manager) Add(parentPID IDType, description string, cancel context.CancelFunc) (IDType, FinishedFunc) {
|
func (pm *Manager) Add(ctx context.Context, description string, cancel context.CancelFunc) (context.Context, IDType, FinishedFunc) {
|
||||||
|
parentPID := GetParentPID(ctx)
|
||||||
|
|
||||||
pm.mutex.Lock()
|
pm.mutex.Lock()
|
||||||
start, pid := pm.nextPID()
|
start, pid := pm.nextPID()
|
||||||
|
|
||||||
@ -120,6 +119,7 @@ func (pm *Manager) Add(parentPID IDType, description string, cancel context.Canc
|
|||||||
finished := func() {
|
finished := func() {
|
||||||
cancel()
|
cancel()
|
||||||
pm.remove(process)
|
pm.remove(process)
|
||||||
|
pprof.SetGoroutineLabels(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
if parent != nil {
|
if parent != nil {
|
||||||
@ -128,7 +128,10 @@ func (pm *Manager) Add(parentPID IDType, description string, cancel context.Canc
|
|||||||
pm.processes[pid] = process
|
pm.processes[pid] = process
|
||||||
pm.mutex.Unlock()
|
pm.mutex.Unlock()
|
||||||
|
|
||||||
return pid, finished
|
pprofCtx := pprof.WithLabels(ctx, pprof.Labels("process-description", description, "ppid", string(parentPID), "pid", string(pid)))
|
||||||
|
pprof.SetGoroutineLabels(pprofCtx)
|
||||||
|
|
||||||
|
return pprofCtx, pid, finished
|
||||||
}
|
}
|
||||||
|
|
||||||
// nextPID will return the next available PID. pm.mutex should already be locked.
|
// nextPID will return the next available PID. pm.mutex should already be locked.
|
||||||
|
Loading…
Reference in New Issue
Block a user