Add alerts for graph-node subgraphs #821
@ -6,10 +6,16 @@ services:
|
||||
restart: always
|
||||
environment:
|
||||
GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL}
|
||||
CERC_GRAFANA_ALERTS_SUBGRAPH_IDS: ${CERC_GRAFANA_ALERTS_SUBGRAPH_IDS}
|
||||
volumes:
|
||||
- ../config/monitoring/grafana/provisioning:/etc/grafana/provisioning
|
||||
- ../config/monitoring/grafana/dashboards:/etc/grafana/dashboards
|
||||
- ../config/monitoring/update-grafana-alerts-config.sh:/update-grafana-alerts-config.sh
|
||||
- grafana_storage:/var/lib/grafana
|
||||
user: root
|
||||
entrypoint: ["bash", "-c"]
|
||||
command: |
|
||||
"/update-grafana-alerts-config.sh && /run.sh"
|
||||
ports:
|
||||
- "3000"
|
||||
healthcheck:
|
||||
|
@ -0,0 +1,64 @@
|
||||
apiVersion: 1
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: subgraph
|
||||
folder: SubgraphAlerts
|
||||
interval: 30s
|
||||
rules:
|
||||
- uid: b2a9144b-6104-46fc-92b5-352f4e643c4c
|
||||
title: subgraph_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: ethereum_chain_head_number - on(network) group_right deployment_head{deployment=~"REPLACE_WITH_SUBGRAPH_IDS"}
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 15
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: diff
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: Alerting
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: Subgraph deployment {{ index $labels "deployment" }} is falling behind head by {{ index $values "diff" }}
|
||||
labels: {}
|
||||
isPaused: false
|
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo Using CERC_GRAFANA_ALERTS_SUBGRAPH_IDS ${CERC_GRAFANA_ALERTS_SUBGRAPH_IDS}
|
||||
|
||||
# Replace subgraph ids in subgraph alerting config
|
||||
# Note: Requires the grafana container to be run with user root
|
||||
sed -i "s/REPLACE_WITH_SUBGRAPH_IDS/$CERC_GRAFANA_ALERTS_SUBGRAPH_IDS/g" /etc/grafana/provisioning/alerting/subgraph-alert-rules.yml
|
@ -113,21 +113,31 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
||||
labels:
|
||||
instance: 'ajna'
|
||||
chain: 'filecoin'
|
||||
|
||||
- job_name: graph-node
|
||||
metrics_path: /metrics
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets: ['GRAPH_NODE_HOST:GRAPH_NODE_HOST_METRICS_PORT']
|
||||
```
|
||||
|
||||
Add scrape config as done above for any additional watcher to add it to the Watchers dashboard.
|
||||
|
||||
### Grafana alerts config
|
||||
|
||||
Place the pre-configured watcher alerts rules in Grafana provisioning directory:
|
||||
Place the pre-configured alerts rules in Grafana provisioning directory:
|
||||
|
||||
```bash
|
||||
# watcher alert rules
|
||||
cp monitoring-watchers-deployment/config/monitoring/watcher-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/
|
||||
|
||||
# subgraph alert rules
|
||||
cp monitoring-watchers-deployment/config/monitoring/subgraph-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/
|
||||
```
|
||||
|
||||
Update the alerting contact points config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
|
||||
|
||||
Add corresponding routes to the notification policies config (`monitoring-watchers-deployment/monitoring/grafana/provisioning/alerting/policies.yaml`) with appropriate object-matchers:
|
||||
Add corresponding routes to the notification policies config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/policies.yml`) with appropriate object-matchers:
|
||||
|
||||
```yml
|
||||
...
|
||||
@ -135,7 +145,7 @@ Add corresponding routes to the notification policies config (`monitoring-watche
|
||||
- receiver: SlackNotifier
|
||||
object_matchers:
|
||||
# Add matchers below
|
||||
- ['grafana_folder', '=', 'WatcherAlerts']
|
||||
- ['grafana_folder', '=~', 'WatcherAlerts|SubgraphAlerts']
|
||||
```
|
||||
|
||||
### Env
|
||||
@ -149,6 +159,9 @@ Set the following env variables in the deployment env config file (`monitoring-w
|
||||
# Grafana server host URL to be used
|
||||
# (Optional, default: http://localhost:3000)
|
||||
GF_SERVER_ROOT_URL=
|
||||
|
||||
# List of subgraph ids to configure alerts for (separated by |)
|
||||
CERC_GRAFANA_ALERTS_SUBGRAPH_IDS=
|
||||
```
|
||||
|
||||
## Start the stack
|
||||
|
Loading…
Reference in New Issue
Block a user