7dc100714d
This PR adds counter metrics for the CPU system and the Geth process. Currently the only metrics available for these items are gauges. Gauges are fine when the consumer scrapes metrics data at the same interval as Geth produces new values (every 3 seconds), but it is likely that most consumers will not scrape that often. Intervals of 10, 15, or maybe even 30 seconds are probably more common. So the problem is, how does the consumer estimate what the CPU was doing in between scrapes. With a counter, it's easy ... you just subtract two successive values and divide by the time to get a nice, accurate average. But with a gauge, you can't do that. A gauge reading is an instantaneous picture of what was happening at that moment, but it gives you no idea about what was going on between scrapes. Taking an average of values is meaningless.
248 lines
7.9 KiB
Go
248 lines
7.9 KiB
Go
package librato
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"math"
|
|
"regexp"
|
|
"time"
|
|
|
|
"github.com/ethereum/go-ethereum/metrics"
|
|
)
|
|
|
|
// a regexp for extracting the unit from time.Duration.String
|
|
var unitRegexp = regexp.MustCompile(`[^\\d]+$`)
|
|
|
|
// a helper that turns a time.Duration into librato display attributes for timer metrics
|
|
func translateTimerAttributes(d time.Duration) (attrs map[string]interface{}) {
|
|
attrs = make(map[string]interface{})
|
|
attrs[DisplayTransform] = fmt.Sprintf("x/%d", int64(d))
|
|
attrs[DisplayUnitsShort] = string(unitRegexp.Find([]byte(d.String())))
|
|
return
|
|
}
|
|
|
|
type Reporter struct {
|
|
Email, Token string
|
|
Namespace string
|
|
Source string
|
|
Interval time.Duration
|
|
Registry metrics.Registry
|
|
Percentiles []float64 // percentiles to report on histogram metrics
|
|
TimerAttributes map[string]interface{} // units in which timers will be displayed
|
|
intervalSec int64
|
|
}
|
|
|
|
func NewReporter(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) *Reporter {
|
|
return &Reporter{e, t, "", s, d, r, p, translateTimerAttributes(u), int64(d / time.Second)}
|
|
}
|
|
|
|
func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) {
|
|
NewReporter(r, d, e, t, s, p, u).Run()
|
|
}
|
|
|
|
func (rep *Reporter) Run() {
|
|
log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015")
|
|
ticker := time.NewTicker(rep.Interval)
|
|
defer ticker.Stop()
|
|
metricsApi := &LibratoClient{rep.Email, rep.Token}
|
|
for now := range ticker.C {
|
|
var metrics Batch
|
|
var err error
|
|
if metrics, err = rep.BuildRequest(now, rep.Registry); err != nil {
|
|
log.Printf("ERROR constructing librato request body %s", err)
|
|
continue
|
|
}
|
|
if err := metricsApi.PostMetrics(metrics); err != nil {
|
|
log.Printf("ERROR sending metrics to librato %s", err)
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculate sum of squares from data provided by metrics.Histogram
|
|
// see http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
|
|
func sumSquares(s metrics.Sample) float64 {
|
|
count := float64(s.Count())
|
|
sumSquared := math.Pow(count*s.Mean(), 2)
|
|
sumSquares := math.Pow(count*s.StdDev(), 2) + sumSquared/count
|
|
if math.IsNaN(sumSquares) {
|
|
return 0.0
|
|
}
|
|
return sumSquares
|
|
}
|
|
func sumSquaresTimer(t metrics.Timer) float64 {
|
|
count := float64(t.Count())
|
|
sumSquared := math.Pow(count*t.Mean(), 2)
|
|
sumSquares := math.Pow(count*t.StdDev(), 2) + sumSquared/count
|
|
if math.IsNaN(sumSquares) {
|
|
return 0.0
|
|
}
|
|
return sumSquares
|
|
}
|
|
|
|
func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) {
|
|
snapshot = Batch{
|
|
// coerce timestamps to a stepping fn so that they line up in Librato graphs
|
|
MeasureTime: (now.Unix() / rep.intervalSec) * rep.intervalSec,
|
|
Source: rep.Source,
|
|
}
|
|
snapshot.Gauges = make([]Measurement, 0)
|
|
snapshot.Counters = make([]Measurement, 0)
|
|
histogramGaugeCount := 1 + len(rep.Percentiles)
|
|
r.Each(func(name string, metric interface{}) {
|
|
if rep.Namespace != "" {
|
|
name = fmt.Sprintf("%s.%s", rep.Namespace, name)
|
|
}
|
|
measurement := Measurement{}
|
|
measurement[Period] = rep.Interval.Seconds()
|
|
switch m := metric.(type) {
|
|
case metrics.Counter:
|
|
if m.Count() > 0 {
|
|
measurement[Name] = fmt.Sprintf("%s.%s", name, "count")
|
|
measurement[Value] = float64(m.Count())
|
|
measurement[Attributes] = map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
}
|
|
snapshot.Counters = append(snapshot.Counters, measurement)
|
|
}
|
|
case metrics.CounterFloat64:
|
|
if m.Count() > 0 {
|
|
measurement[Name] = fmt.Sprintf("%s.%s", name, "count")
|
|
measurement[Value] = m.Count()
|
|
measurement[Attributes] = map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
}
|
|
snapshot.Counters = append(snapshot.Counters, measurement)
|
|
}
|
|
case metrics.Gauge:
|
|
measurement[Name] = name
|
|
measurement[Value] = float64(m.Value())
|
|
snapshot.Gauges = append(snapshot.Gauges, measurement)
|
|
case metrics.GaugeFloat64:
|
|
measurement[Name] = name
|
|
measurement[Value] = m.Value()
|
|
snapshot.Gauges = append(snapshot.Gauges, measurement)
|
|
case metrics.Histogram:
|
|
if m.Count() > 0 {
|
|
gauges := make([]Measurement, histogramGaugeCount)
|
|
s := m.Sample()
|
|
measurement[Name] = fmt.Sprintf("%s.%s", name, "hist")
|
|
measurement[Count] = uint64(s.Count())
|
|
measurement[Max] = float64(s.Max())
|
|
measurement[Min] = float64(s.Min())
|
|
measurement[Sum] = float64(s.Sum())
|
|
measurement[SumSquares] = sumSquares(s)
|
|
gauges[0] = measurement
|
|
for i, p := range rep.Percentiles {
|
|
gauges[i+1] = Measurement{
|
|
Name: fmt.Sprintf("%s.%.2f", measurement[Name], p),
|
|
Value: s.Percentile(p),
|
|
Period: measurement[Period],
|
|
}
|
|
}
|
|
snapshot.Gauges = append(snapshot.Gauges, gauges...)
|
|
}
|
|
case metrics.Meter:
|
|
measurement[Name] = name
|
|
measurement[Value] = float64(m.Count())
|
|
snapshot.Counters = append(snapshot.Counters, measurement)
|
|
snapshot.Gauges = append(snapshot.Gauges,
|
|
Measurement{
|
|
Name: fmt.Sprintf("%s.%s", name, "1min"),
|
|
Value: m.Rate1(),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
},
|
|
},
|
|
Measurement{
|
|
Name: fmt.Sprintf("%s.%s", name, "5min"),
|
|
Value: m.Rate5(),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
},
|
|
},
|
|
Measurement{
|
|
Name: fmt.Sprintf("%s.%s", name, "15min"),
|
|
Value: m.Rate15(),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
},
|
|
},
|
|
)
|
|
case metrics.Timer:
|
|
measurement[Name] = name
|
|
measurement[Value] = float64(m.Count())
|
|
snapshot.Counters = append(snapshot.Counters, measurement)
|
|
if m.Count() > 0 {
|
|
libratoName := fmt.Sprintf("%s.%s", name, "timer.mean")
|
|
gauges := make([]Measurement, histogramGaugeCount)
|
|
gauges[0] = Measurement{
|
|
Name: libratoName,
|
|
Count: uint64(m.Count()),
|
|
Sum: m.Mean() * float64(m.Count()),
|
|
Max: float64(m.Max()),
|
|
Min: float64(m.Min()),
|
|
SumSquares: sumSquaresTimer(m),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: rep.TimerAttributes,
|
|
}
|
|
for i, p := range rep.Percentiles {
|
|
gauges[i+1] = Measurement{
|
|
Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100),
|
|
Value: m.Percentile(p),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: rep.TimerAttributes,
|
|
}
|
|
}
|
|
snapshot.Gauges = append(snapshot.Gauges, gauges...)
|
|
snapshot.Gauges = append(snapshot.Gauges,
|
|
Measurement{
|
|
Name: fmt.Sprintf("%s.%s", name, "rate.1min"),
|
|
Value: m.Rate1(),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
},
|
|
},
|
|
Measurement{
|
|
Name: fmt.Sprintf("%s.%s", name, "rate.5min"),
|
|
Value: m.Rate5(),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
},
|
|
},
|
|
Measurement{
|
|
Name: fmt.Sprintf("%s.%s", name, "rate.15min"),
|
|
Value: m.Rate15(),
|
|
Period: int64(rep.Interval.Seconds()),
|
|
Attributes: map[string]interface{}{
|
|
DisplayUnitsLong: Operations,
|
|
DisplayUnitsShort: OperationsShort,
|
|
DisplayMin: "0",
|
|
},
|
|
},
|
|
)
|
|
}
|
|
}
|
|
})
|
|
return
|
|
}
|