Add a laconicd Grafana dashboard to monitoring stack (#799)

Part of https://www.notion.so/Monitoring-and-alerting-for-laconicd-86727c3b4dde4dc993d87d6e29f935fe

- Add a laconicd Grafana dashboard
  - Update fixturenet-laconicd script to expose metrics
- Upgrade Grafana version to avoid errors while saving changes made to a dashboard (see [thread](https://community.grafana.com/t/error-cannot-add-property-ishandled-object-is-not-extensible/109268))
-  Add an alert rule for Ajna watcher

Reviewed-on: cerc-io/stack-orchestrator#799
Co-authored-by: Prathamesh Musale <prathamesh.musale0@gmail.com>
Co-committed-by: Prathamesh Musale <prathamesh.musale0@gmail.com>
This commit is contained in:
Prathamesh Musale 2024-04-11 07:59:36 +00:00 committed by ashwin
parent 87fffca358
commit 345d200873
7 changed files with 1198 additions and 1 deletions

View File

@ -2,7 +2,7 @@ version: "3.7"
services:
grafana:
image: grafana/grafana:10.2.2
image: grafana/grafana:10.2.3
restart: always
environment:
GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL}

View File

@ -102,6 +102,17 @@ if [ "$1" == "clean" ] || [ ! -d "$HOME/.laconicd/data/blockstore.db" ]; then
fi
fi
# Enable telemetry (prometheus metrics: http://localhost:1317/metrics?format=prometheus)
if [[ "$OSTYPE" == "darwin"* ]]; then
sed -i '' 's/enabled = false/enabled = true/g' $HOME/.laconicd/config/app.toml
sed -i '' 's/prometheus-retention-time = 0/prometheus-retention-time = 60/g' $HOME/.laconicd/config/app.toml
sed -i '' 's/prometheus = false/prometheus = true/g' $HOME/.laconicd/config/config.toml
else
sed -i 's/enabled = false/enabled = true/g' $HOME/.laconicd/config/app.toml
sed -i 's/prometheus-retention-time = 0/prometheus-retention-time = 60/g' $HOME/.laconicd/config/app.toml
sed -i 's/prometheus = false/prometheus = true/g' $HOME/.laconicd/config/config.toml
fi
# Allocate genesis accounts (cosmos formatted addresses)
laconicd add-genesis-account $KEY 100000000000000000000000000aphoton --keyring-backend $KEYRING

View File

@ -65,3 +65,12 @@ scrape_configs:
target_label: instance
- target_label: __address__
replacement: postgres-exporter:9187
- job_name: laconicd
metrics_path: /metrics
scrape_interval: 30s
static_configs:
# Add laconicd REST endpoint target with host and port (1317)
# - targets: ['example-host:1317']
params:
format: ['prometheus']

View File

@ -771,3 +771,81 @@ groups:
annotations:
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
isPaused: false
# Ajna
- uid: ajna_diff_external
title: ajna_watcher_head_tracking
condition: condition
data:
- refId: diff
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
datasource:
type: prometheus
uid: PBFA97CFB590B2093
disableTextWrap: false
editorMode: code
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="ajna", instance="ajna", kind="latest_indexed"}
fullMetaSearch: false
includeNullMetadata: true
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: diff
useBackend: false
- refId: latest_external
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
datasource:
type: prometheus
uid: PBFA97CFB590B2093
editorMode: code
expr: latest_block_number{chain="filecoin"}
hide: false
instant: true
legendFormat: __auto
range: false
refId: latest_external
- refId: condition
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: ${diff} >= 16
intervalMs: 1000
maxDataPoints: 43200
refId: condition
type: math
noDataState: Alerting
execErrState: Alerting
for: 15m
annotations:
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
isPaused: false

View File

@ -4,6 +4,7 @@
* Comes with the following built-in exporters / dashboards:
* Chain Head Exporter - for tracking chain heads given external ETH RPC endpoints
* Watchers dashboard
* laconicd dashboard
* [Prometheus Blackbox](https://grafana.com/grafana/dashboards/7587-prometheus-blackbox-exporter/) - for tracking HTTP endpoints
* [NodeJS Application Dashboard](https://grafana.com/grafana/dashboards/11159-nodejs-application-dashboard/) - for default NodeJS metrics
* [PostgreSQL Database](https://grafana.com/grafana/dashboards/9628-postgresql-database/) - for monitoring Postgres dbs
@ -99,6 +100,7 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-spec.yml --de
- targets:
- <HTTP_ENDPOINT_1>
- <HTTP_ENDPOINT_2>
- <LACONICD_GQL_ENDPOINT>
```
* Postgres (in-stack exporter):
@ -116,6 +118,16 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-spec.yml --de
```
* Add database credentials to be used in `auth_modules` in the postgres-exporter config file (`monitoring-deployment/config/monitoring/postgres-exporter.yml`)
* laconicd: update the `laconicd` job with a laconicd node's REST endpoint host and port:
```yml
...
- job_name: laconicd
static_configs:
- targets: ['example-host:1317']
...
```
Note: Use `host.docker.internal` as host to access ports on the host machine
### Grafana Config

View File

@ -46,6 +46,11 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
static_configs:
- targets:
- <AZIMUTH_GATEWAY_GQL_ENDPOINT>
- <LACONICD_GQL_ENDPOINT>
...
- job_name: laconicd
static_configs:
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
...
- job_name: azimuth
scrape_interval: 10s
@ -98,6 +103,16 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
labels:
instance: 'merkl_sushiswap'
chain: 'filecoin'
- job_name: ajna
scrape_interval: 20s
metrics_path: /metrics
scheme: http
static_configs:
- targets: ['AJNA_WATCHER_HOST:AJNA_WATCHER_PORT']
labels:
instance: 'ajna'
chain: 'filecoin'
```
Add scrape config as done above for any additional watcher to add it to the Watchers dashboard.