Add alerts on blackbox metrics for monitoring endpoints
All checks were successful
Lint Checks / Run linter (pull_request) Successful in 44s
Webapp Test / Run webapp test suite (pull_request) Successful in 4m33s
Deploy Test / Run deploy test suite (pull_request) Successful in 5m32s
K8s Deploy Test / Run deploy test suite on kind/k8s (pull_request) Successful in 9m30s
Smoke Test / Run basic test suite (pull_request) Successful in 4m52s
All checks were successful
Lint Checks / Run linter (pull_request) Successful in 44s
Webapp Test / Run webapp test suite (pull_request) Successful in 4m33s
Deploy Test / Run deploy test suite (pull_request) Successful in 5m32s
K8s Deploy Test / Run deploy test suite on kind/k8s (pull_request) Successful in 9m30s
Smoke Test / Run basic test suite (pull_request) Successful in 4m52s
This commit is contained in:
parent
345d200873
commit
1746f7366c
@ -0,0 +1,121 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
groups:
|
||||||
|
- orgId: 1
|
||||||
|
name: blackbox
|
||||||
|
folder: BlackboxAlerts
|
||||||
|
interval: 30s
|
||||||
|
rules:
|
||||||
|
# Azimuth Gateway endpoint
|
||||||
|
- uid: azimuth_gateway
|
||||||
|
title: azimuth_gateway_endpoint_tracking
|
||||||
|
condition: condition
|
||||||
|
data:
|
||||||
|
- refId: probe
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: PBFA97CFB590B2093
|
||||||
|
model:
|
||||||
|
editorMode: code
|
||||||
|
expr: probe_success{instance="<AZIMUTH_GATEWAY_GQL_ENDPOINT>"}
|
||||||
|
instant: true
|
||||||
|
intervalMs: 1000
|
||||||
|
legendFormat: __auto
|
||||||
|
maxDataPoints: 43200
|
||||||
|
range: false
|
||||||
|
refId: probe
|
||||||
|
- refId: condition
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params:
|
||||||
|
- 0
|
||||||
|
- 0
|
||||||
|
type: gt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
query:
|
||||||
|
params: []
|
||||||
|
reducer:
|
||||||
|
params: []
|
||||||
|
type: avg
|
||||||
|
type: query
|
||||||
|
datasource:
|
||||||
|
name: Expression
|
||||||
|
type: __expr__
|
||||||
|
uid: __expr__
|
||||||
|
expression: ${probe} != 1
|
||||||
|
intervalMs: 1000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
refId: condition
|
||||||
|
type: math
|
||||||
|
noDataState: Alerting
|
||||||
|
execErrState: Alerting
|
||||||
|
for: 5m
|
||||||
|
annotations:
|
||||||
|
summary: Probe failed for Azimuth gateway endpoint {{ index $labels "instance" }}
|
||||||
|
labels:
|
||||||
|
endpoint: '{{ index $labels "instance" }}'
|
||||||
|
probe_success: '{{ index $values "probe" }}'
|
||||||
|
isPaused: false
|
||||||
|
# Laconicd GQL endpoint
|
||||||
|
- uid: laconicd_gql
|
||||||
|
title: laconicd_gql_endpoint_tracking
|
||||||
|
condition: condition
|
||||||
|
data:
|
||||||
|
- refId: probe
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: PBFA97CFB590B2093
|
||||||
|
model:
|
||||||
|
editorMode: code
|
||||||
|
expr: probe_success{instance="<LACONICD_GQL_ENDPOINT>"}
|
||||||
|
instant: true
|
||||||
|
intervalMs: 1000
|
||||||
|
legendFormat: __auto
|
||||||
|
maxDataPoints: 43200
|
||||||
|
range: false
|
||||||
|
refId: probe
|
||||||
|
- refId: condition
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params:
|
||||||
|
- 0
|
||||||
|
- 0
|
||||||
|
type: gt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
query:
|
||||||
|
params: []
|
||||||
|
reducer:
|
||||||
|
params: []
|
||||||
|
type: avg
|
||||||
|
type: query
|
||||||
|
datasource:
|
||||||
|
name: Expression
|
||||||
|
type: __expr__
|
||||||
|
uid: __expr__
|
||||||
|
expression: ${probe} != 1
|
||||||
|
intervalMs: 1000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
refId: condition
|
||||||
|
type: math
|
||||||
|
noDataState: Alerting
|
||||||
|
execErrState: Alerting
|
||||||
|
for: 5m
|
||||||
|
annotations:
|
||||||
|
summary: Probe failed for Laconicd GQL endpoint {{ index $labels "instance" }}
|
||||||
|
labels:
|
||||||
|
endpoint: '{{ index $labels "instance" }}'
|
||||||
|
probe_success: '{{ index $values "probe" }}'
|
||||||
|
isPaused: false
|
@ -49,7 +49,7 @@
|
|||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 3,
|
"h": 3,
|
||||||
"w": 3,
|
"w": 4,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 0
|
"y": 0
|
||||||
},
|
},
|
||||||
|
@ -123,8 +123,9 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-spec.yml --de
|
|||||||
```yml
|
```yml
|
||||||
...
|
...
|
||||||
- job_name: laconicd
|
- job_name: laconicd
|
||||||
static_configs:
|
...
|
||||||
- targets: ['example-host:1317']
|
static_configs:
|
||||||
|
- targets: ['example-host:1317']
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -49,8 +49,9 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
|||||||
- <LACONICD_GQL_ENDPOINT>
|
- <LACONICD_GQL_ENDPOINT>
|
||||||
...
|
...
|
||||||
- job_name: laconicd
|
- job_name: laconicd
|
||||||
static_configs:
|
...
|
||||||
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
|
static_configs:
|
||||||
|
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
|
||||||
...
|
...
|
||||||
- job_name: azimuth
|
- job_name: azimuth
|
||||||
scrape_interval: 10s
|
scrape_interval: 10s
|
||||||
|
Loading…
Reference in New Issue
Block a user