Add alerts for graph-node subgraphs #821
@ -6,10 +6,16 @@ services:
|
|||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL}
|
GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL}
|
||||||
|
CERC_GRAFANA_ALERTS_SUBGRAPH_IDS: ${CERC_GRAFANA_ALERTS_SUBGRAPH_IDS}
|
||||||
volumes:
|
volumes:
|
||||||
- ../config/monitoring/grafana/provisioning:/etc/grafana/provisioning
|
- ../config/monitoring/grafana/provisioning:/etc/grafana/provisioning
|
||||||
- ../config/monitoring/grafana/dashboards:/etc/grafana/dashboards
|
- ../config/monitoring/grafana/dashboards:/etc/grafana/dashboards
|
||||||
|
- ../config/monitoring/update-grafana-alerts-config.sh:/update-grafana-alerts-config.sh
|
||||||
- grafana_storage:/var/lib/grafana
|
- grafana_storage:/var/lib/grafana
|
||||||
|
user: root
|
||||||
|
entrypoint: ["bash", "-c"]
|
||||||
|
command: |
|
||||||
|
"/update-grafana-alerts-config.sh && /run.sh"
|
||||||
ports:
|
ports:
|
||||||
- "3000"
|
- "3000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
@ -0,0 +1,64 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
groups:
|
||||||
|
- orgId: 1
|
||||||
|
name: subgraph
|
||||||
|
folder: SubgraphAlerts
|
||||||
|
interval: 30s
|
||||||
|
rules:
|
||||||
|
- uid: b2a9144b-6104-46fc-92b5-352f4e643c4c
|
||||||
|
title: subgraph_head_tracking
|
||||||
|
condition: condition
|
||||||
|
data:
|
||||||
|
- refId: diff
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: PBFA97CFB590B2093
|
||||||
|
model:
|
||||||
|
datasource:
|
||||||
|
type: prometheus
|
||||||
|
uid: PBFA97CFB590B2093
|
||||||
|
editorMode: code
|
||||||
|
expr: ethereum_chain_head_number - on(network) group_right deployment_head{deployment=~"REPLACE_WITH_SUBGRAPH_IDS"}
|
||||||
|
instant: true
|
||||||
|
intervalMs: 1000
|
||||||
|
legendFormat: __auto
|
||||||
|
maxDataPoints: 43200
|
||||||
|
range: false
|
||||||
|
refId: diff
|
||||||
|
- refId: condition
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params:
|
||||||
|
- 15
|
||||||
|
- 0
|
||||||
|
type: gt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
query:
|
||||||
|
params: []
|
||||||
|
reducer:
|
||||||
|
params: []
|
||||||
|
type: avg
|
||||||
|
type: query
|
||||||
|
datasource:
|
||||||
|
name: Expression
|
||||||
|
type: __expr__
|
||||||
|
uid: __expr__
|
||||||
|
expression: diff
|
||||||
|
intervalMs: 1000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
refId: condition
|
||||||
|
type: threshold
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: Alerting
|
||||||
|
for: 5m
|
||||||
|
annotations:
|
||||||
|
summary: Subgraph deployment {{ index $labels "deployment" }} is falling behind head by {{ index $values "diff" }}
|
||||||
|
labels: {}
|
||||||
|
isPaused: false
|
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo Using CERC_GRAFANA_ALERTS_SUBGRAPH_IDS ${CERC_GRAFANA_ALERTS_SUBGRAPH_IDS}
|
||||||
|
|
||||||
|
# Replace subgraph ids in subgraph alerting config
|
||||||
|
# Note: Requires the grafana container to be run with user root
|
||||||
|
sed -i "s/REPLACE_WITH_SUBGRAPH_IDS/$CERC_GRAFANA_ALERTS_SUBGRAPH_IDS/g" /etc/grafana/provisioning/alerting/subgraph-alert-rules.yml
|
@ -113,21 +113,31 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
|||||||
labels:
|
labels:
|
||||||
instance: 'ajna'
|
instance: 'ajna'
|
||||||
chain: 'filecoin'
|
chain: 'filecoin'
|
||||||
|
|
||||||
|
- job_name: graph-node
|
||||||
|
metrics_path: /metrics
|
||||||
|
scrape_interval: 30s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['GRAPH_NODE_HOST:GRAPH_NODE_HOST_METRICS_PORT']
|
||||||
```
|
```
|
||||||
|
|
||||||
Add scrape config as done above for any additional watcher to add it to the Watchers dashboard.
|
Add scrape config as done above for any additional watcher to add it to the Watchers dashboard.
|
||||||
|
|
||||||
### Grafana alerts config
|
### Grafana alerts config
|
||||||
|
|
||||||
Place the pre-configured watcher alerts rules in Grafana provisioning directory:
|
Place the pre-configured alerts rules in Grafana provisioning directory:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# watcher alert rules
|
||||||
cp monitoring-watchers-deployment/config/monitoring/watcher-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/
|
cp monitoring-watchers-deployment/config/monitoring/watcher-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/
|
||||||
|
|
||||||
|
# subgraph alert rules
|
||||||
|
cp monitoring-watchers-deployment/config/monitoring/subgraph-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/
|
||||||
```
|
```
|
||||||
|
|
||||||
Update the alerting contact points config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
|
Update the alerting contact points config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
|
||||||
|
|
||||||
Add corresponding routes to the notification policies config (`monitoring-watchers-deployment/monitoring/grafana/provisioning/alerting/policies.yaml`) with appropriate object-matchers:
|
Add corresponding routes to the notification policies config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/policies.yml`) with appropriate object-matchers:
|
||||||
|
|
||||||
```yml
|
```yml
|
||||||
...
|
...
|
||||||
@ -135,7 +145,7 @@ Add corresponding routes to the notification policies config (`monitoring-watche
|
|||||||
- receiver: SlackNotifier
|
- receiver: SlackNotifier
|
||||||
object_matchers:
|
object_matchers:
|
||||||
# Add matchers below
|
# Add matchers below
|
||||||
- ['grafana_folder', '=', 'WatcherAlerts']
|
- ['grafana_folder', '=~', 'WatcherAlerts|SubgraphAlerts']
|
||||||
```
|
```
|
||||||
|
|
||||||
### Env
|
### Env
|
||||||
@ -149,6 +159,9 @@ Set the following env variables in the deployment env config file (`monitoring-w
|
|||||||
# Grafana server host URL to be used
|
# Grafana server host URL to be used
|
||||||
# (Optional, default: http://localhost:3000)
|
# (Optional, default: http://localhost:3000)
|
||||||
GF_SERVER_ROOT_URL=
|
GF_SERVER_ROOT_URL=
|
||||||
|
|
||||||
|
# List of subgraph ids to configure alerts for (separated by |)
|
||||||
|
CERC_GRAFANA_ALERTS_SUBGRAPH_IDS=
|
||||||
```
|
```
|
||||||
|
|
||||||
## Start the stack
|
## Start the stack
|
||||||
|
Loading…
Reference in New Issue
Block a user