Add alerts on blackbox metrics for monitoring endpoints

This commit is contained in:
Prathamesh Musale 2024-04-15 17:02:50 +05:30
parent 345d200873
commit 1746f7366c
4 changed files with 128 additions and 5 deletions

View File

@ -0,0 +1,121 @@
apiVersion: 1
groups:
- orgId: 1
name: blackbox
folder: BlackboxAlerts
interval: 30s
rules:
# Azimuth Gateway endpoint
- uid: azimuth_gateway
title: azimuth_gateway_endpoint_tracking
condition: condition
data:
- refId: probe
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
editorMode: code
expr: probe_success{instance="<AZIMUTH_GATEWAY_GQL_ENDPOINT>"}
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: probe
- refId: condition
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: ${probe} != 1
intervalMs: 1000
maxDataPoints: 43200
refId: condition
type: math
noDataState: Alerting
execErrState: Alerting
for: 5m
annotations:
summary: Probe failed for Azimuth gateway endpoint {{ index $labels "instance" }}
labels:
endpoint: '{{ index $labels "instance" }}'
probe_success: '{{ index $values "probe" }}'
isPaused: false
# Laconicd GQL endpoint
- uid: laconicd_gql
title: laconicd_gql_endpoint_tracking
condition: condition
data:
- refId: probe
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
editorMode: code
expr: probe_success{instance="<LACONICD_GQL_ENDPOINT>"}
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: probe
- refId: condition
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: ${probe} != 1
intervalMs: 1000
maxDataPoints: 43200
refId: condition
type: math
noDataState: Alerting
execErrState: Alerting
for: 5m
annotations:
summary: Probe failed for Laconicd GQL endpoint {{ index $labels "instance" }}
labels:
endpoint: '{{ index $labels "instance" }}'
probe_success: '{{ index $values "probe" }}'
isPaused: false

View File

@ -49,7 +49,7 @@
}, },
"gridPos": { "gridPos": {
"h": 3, "h": 3,
"w": 3, "w": 4,
"x": 0, "x": 0,
"y": 0 "y": 0
}, },

View File

@ -123,8 +123,9 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-spec.yml --de
```yml ```yml
... ...
- job_name: laconicd - job_name: laconicd
static_configs: ...
- targets: ['example-host:1317'] static_configs:
- targets: ['example-host:1317']
... ...
``` ```

View File

@ -49,8 +49,9 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
- <LACONICD_GQL_ENDPOINT> - <LACONICD_GQL_ENDPOINT>
... ...
- job_name: laconicd - job_name: laconicd
static_configs: ...
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT'] static_configs:
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
... ...
- job_name: azimuth - job_name: azimuth
scrape_interval: 10s scrape_interval: 10s