c539bda166
This changes how we read performance metrics from the Go runtime. Instead of using runtime.ReadMemStats, we now rely on the API provided by package runtime/metrics. runtime/metrics provides more accurate information. For example, the new interface has better reporting of memory use. In my testing, the reported value of held memory more accurately reflects the usage reported by the OS. The semantics of metrics system/memory/allocs and system/memory/frees have changed to report amounts in bytes. ReadMemStats only reported the count of allocations in number-of-objects. This is imprecise: 'tiny objects' are not counted because the runtime allocates them in batches; and certain improvements in allocation behavior, such as struct size optimizations, will be less visible when the number of allocs doesn't change. Changing allocation reports to be in bytes makes it appear in graphs that lots more is being allocated. I don't think that's a problem because this metric is primarily interesting for geth developers. The metric system/memory/pauses has been changed to report statistical values from the histogram provided by the runtime. Its name in influxdb has changed from geth.system/memory/pauses.meter to geth.system/memory/pauses.histogram. We also have a new histogram metric, system/cpu/schedlatency, reporting the Go scheduler latency.
205 lines
7.5 KiB
Go
205 lines
7.5 KiB
Go
// Go port of Coda Hale's Metrics library
|
|
//
|
|
// <https://github.com/rcrowley/go-metrics>
|
|
//
|
|
// Coda Hale's original work: <https://github.com/codahale/metrics>
|
|
package metrics
|
|
|
|
import (
|
|
"os"
|
|
"runtime/metrics"
|
|
"runtime/pprof"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/ethereum/go-ethereum/log"
|
|
)
|
|
|
|
// Enabled is checked by the constructor functions for all of the
|
|
// standard metrics. If it is true, the metric returned is a stub.
|
|
//
|
|
// This global kill-switch helps quantify the observer effect and makes
|
|
// for less cluttered pprof profiles.
|
|
var Enabled = false
|
|
|
|
// EnabledExpensive is a soft-flag meant for external packages to check if costly
|
|
// metrics gathering is allowed or not. The goal is to separate standard metrics
|
|
// for health monitoring and debug metrics that might impact runtime performance.
|
|
var EnabledExpensive = false
|
|
|
|
// enablerFlags is the CLI flag names to use to enable metrics collections.
|
|
var enablerFlags = []string{"metrics"}
|
|
|
|
// expensiveEnablerFlags is the CLI flag names to use to enable metrics collections.
|
|
var expensiveEnablerFlags = []string{"metrics.expensive"}
|
|
|
|
// Init enables or disables the metrics system. Since we need this to run before
|
|
// any other code gets to create meters and timers, we'll actually do an ugly hack
|
|
// and peek into the command line args for the metrics flag.
|
|
func init() {
|
|
for _, arg := range os.Args {
|
|
flag := strings.TrimLeft(arg, "-")
|
|
|
|
for _, enabler := range enablerFlags {
|
|
if !Enabled && flag == enabler {
|
|
log.Info("Enabling metrics collection")
|
|
Enabled = true
|
|
}
|
|
}
|
|
for _, enabler := range expensiveEnablerFlags {
|
|
if !EnabledExpensive && flag == enabler {
|
|
log.Info("Enabling expensive metrics collection")
|
|
EnabledExpensive = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
var threadCreateProfile = pprof.Lookup("threadcreate")
|
|
|
|
type runtimeStats struct {
|
|
GCPauses *metrics.Float64Histogram
|
|
GCAllocBytes uint64
|
|
GCFreedBytes uint64
|
|
|
|
MemTotal uint64
|
|
HeapObjects uint64
|
|
HeapFree uint64
|
|
HeapReleased uint64
|
|
HeapUnused uint64
|
|
|
|
Goroutines uint64
|
|
SchedLatency *metrics.Float64Histogram
|
|
}
|
|
|
|
var runtimeSamples = []metrics.Sample{
|
|
{Name: "/gc/pauses:seconds"}, // histogram
|
|
{Name: "/gc/heap/allocs:bytes"},
|
|
{Name: "/gc/heap/frees:bytes"},
|
|
{Name: "/memory/classes/total:bytes"},
|
|
{Name: "/memory/classes/heap/objects:bytes"},
|
|
{Name: "/memory/classes/heap/free:bytes"},
|
|
{Name: "/memory/classes/heap/released:bytes"},
|
|
{Name: "/memory/classes/heap/unused:bytes"},
|
|
{Name: "/sched/goroutines:goroutines"},
|
|
{Name: "/sched/latencies:seconds"}, // histogram
|
|
}
|
|
|
|
func readRuntimeStats(v *runtimeStats) {
|
|
metrics.Read(runtimeSamples)
|
|
for _, s := range runtimeSamples {
|
|
// Skip invalid/unknown metrics. This is needed because some metrics
|
|
// are unavailable in older Go versions, and attempting to read a 'bad'
|
|
// metric panics.
|
|
if s.Value.Kind() == metrics.KindBad {
|
|
continue
|
|
}
|
|
|
|
switch s.Name {
|
|
case "/gc/pauses:seconds":
|
|
v.GCPauses = s.Value.Float64Histogram()
|
|
case "/gc/heap/allocs:bytes":
|
|
v.GCAllocBytes = s.Value.Uint64()
|
|
case "/gc/heap/frees:bytes":
|
|
v.GCFreedBytes = s.Value.Uint64()
|
|
case "/memory/classes/total:bytes":
|
|
v.MemTotal = s.Value.Uint64()
|
|
case "/memory/classes/heap/objects:bytes":
|
|
v.HeapObjects = s.Value.Uint64()
|
|
case "/memory/classes/heap/free:bytes":
|
|
v.HeapFree = s.Value.Uint64()
|
|
case "/memory/classes/heap/released:bytes":
|
|
v.HeapReleased = s.Value.Uint64()
|
|
case "/memory/classes/heap/unused:bytes":
|
|
v.HeapUnused = s.Value.Uint64()
|
|
case "/sched/goroutines:goroutines":
|
|
v.Goroutines = s.Value.Uint64()
|
|
case "/sched/latencies:seconds":
|
|
v.SchedLatency = s.Value.Float64Histogram()
|
|
}
|
|
}
|
|
}
|
|
|
|
// CollectProcessMetrics periodically collects various metrics about the running process.
|
|
func CollectProcessMetrics(refresh time.Duration) {
|
|
// Short circuit if the metrics system is disabled
|
|
if !Enabled {
|
|
return
|
|
}
|
|
|
|
refreshFreq := int64(refresh / time.Second)
|
|
|
|
// Create the various data collectors
|
|
var (
|
|
cpustats = make([]CPUStats, 2)
|
|
diskstats = make([]DiskStats, 2)
|
|
rstats = make([]runtimeStats, 2)
|
|
)
|
|
|
|
// This scale factor is used for the runtime's time metrics. It's useful to convert to
|
|
// ns here because the runtime gives times in float seconds, but runtimeHistogram can
|
|
// only provide integers for the minimum and maximum values.
|
|
const secondsToNs = float64(time.Second)
|
|
|
|
// Define the various metrics to collect
|
|
var (
|
|
cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
|
|
cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry)
|
|
cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
|
|
cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
|
|
cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
|
|
cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil)
|
|
memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil)
|
|
memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry)
|
|
memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry)
|
|
memTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
|
|
heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
|
|
heapObjects = GetOrRegisterGauge("system/memory/objects", DefaultRegistry)
|
|
diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry)
|
|
diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry)
|
|
diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry)
|
|
diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry)
|
|
diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry)
|
|
diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry)
|
|
)
|
|
|
|
// Iterate loading the different stats and updating the meters.
|
|
now, prev := 0, 1
|
|
for ; ; now, prev = prev, now {
|
|
// CPU
|
|
ReadCPUStats(&cpustats[now])
|
|
cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq)
|
|
cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq)
|
|
cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq)
|
|
|
|
// Threads
|
|
cpuThreads.Update(int64(threadCreateProfile.Count()))
|
|
|
|
// Go runtime metrics
|
|
readRuntimeStats(&rstats[now])
|
|
|
|
cpuGoroutines.Update(int64(rstats[now].Goroutines))
|
|
cpuSchedLatency.update(rstats[now].SchedLatency)
|
|
memPauses.update(rstats[now].GCPauses)
|
|
|
|
memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes))
|
|
memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes))
|
|
|
|
memTotal.Update(int64(rstats[now].MemTotal))
|
|
heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased))
|
|
heapObjects.Update(int64(rstats[now].HeapObjects))
|
|
|
|
// Disk
|
|
if ReadDiskStats(&diskstats[now]) == nil {
|
|
diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount)
|
|
diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
|
|
diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount)
|
|
diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
|
|
diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
|
|
diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
|
|
}
|
|
|
|
time.Sleep(refresh)
|
|
}
|
|
}
|