Setup basic alerting for watchers in monitoring stack (#698)
* Provision Grafana alert contactpoints and policies for Slack * Add watcher alert rules * Update watcher monitoring instructions * Add listening port flag to node exporter command * Add reference links
This commit is contained in:
parent
eae2af7ccc
commit
a3eb3c0bb0
@ -5,12 +5,11 @@ services:
|
||||
image: prom/node-exporter:latest
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- '--web.listen-address=:9100'
|
||||
- '--path.rootfs=/host'
|
||||
- '--collector.systemd'
|
||||
- '--collector.processes'
|
||||
network_mode: host
|
||||
pid: host
|
||||
ports:
|
||||
- 9100
|
||||
volumes:
|
||||
- '/:/host:ro,rslave'
|
||||
|
@ -0,0 +1,14 @@
|
||||
# https://www.clever-cloud.com/blog/features/2021/12/03/slack-alerts-for-grafana/
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
contactPoints:
|
||||
- orgId: 1
|
||||
name: SlackNotifier
|
||||
receivers:
|
||||
- uid: a71b06e3-58b6-41fe-af65-fbbb29653951
|
||||
type: slack
|
||||
settings:
|
||||
# Slack hook URL (see https://api.slack.com/messaging/webhooks)
|
||||
url: <YOUR_SLACK_HOOK_URL>
|
||||
disableResolveMessage: false
|
@ -0,0 +1,15 @@
|
||||
# https://grafana.com/docs/grafana/latest/alerting/alerting-rules/create-notification-policy/
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
policies:
|
||||
- orgId: 1
|
||||
receiver: grafana-default-email
|
||||
group_by:
|
||||
- grafana_folder
|
||||
- alertname
|
||||
routes:
|
||||
- receiver: SlackNotifier
|
||||
object_matchers:
|
||||
# Add matchers below
|
||||
# - ['grafana_folder', '=', 'MyAlerts']
|
@ -0,0 +1,973 @@
|
||||
# https://grafana.com/docs/grafana/latest/alerting/alerting-rules/create-grafana-managed-rule/
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: watcher
|
||||
folder: WatcherAlerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# Azimuth
|
||||
- uid: azimuth_diff_external
|
||||
title: azimuth_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="azimuth", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="azimuth", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: censures_diff_external
|
||||
title: censures_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="censures", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="censures", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: claims_diff_external
|
||||
title: claims_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="claims", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="claims", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: conditional_star_release_diff_external
|
||||
title: conditional_star_release_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="conditional_star_release", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="conditional_star_release", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: delegated_sending_diff_external
|
||||
title: delegated_sending_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="delegated_sending", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="delegated_sending", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: ecliptic_diff_external
|
||||
title: ecliptic_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="ecliptic", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="ecliptic", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: linear_star_release_diff_external
|
||||
title: linear_star_release_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="linear_star_release", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="azimuth", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: polls_diff_external
|
||||
title: polls_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="polls", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="ethereum"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="azimuth", instance="polls", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
|
||||
# Sushi
|
||||
- uid: sushiswap_diff_external
|
||||
title: sushiswap_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="sushi", instance="sushiswap", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="filecoin"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="sushi", instance="sushiswap", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
||||
- uid: merkl_sushiswap_diff_external
|
||||
title: merkl_sushiswap_watcher_head_tracking
|
||||
condition: condition
|
||||
data:
|
||||
- refId: diff
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
disableTextWrap: false
|
||||
editorMode: code
|
||||
expr: latest_block_number - on(chain) group_right sync_status_block_number{job="sushi", instance="merkl_sushiswap", kind="latest_indexed"}
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: diff
|
||||
useBackend: false
|
||||
- refId: latest_external
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: latest_block_number{chain="filecoin"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_external
|
||||
- refId: latest_indexed
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: sync_status_block_number{job="sushi", instance="merkl_sushiswap", kind="latest_indexed"}
|
||||
hide: false
|
||||
instant: true
|
||||
legendFormat: __auto
|
||||
range: false
|
||||
refId: latest_indexed
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${diff} >= 16
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
panelId: 24
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 15m
|
||||
annotations:
|
||||
__dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca
|
||||
__panelId__: "24"
|
||||
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
|
||||
isPaused: false
|
@ -35,6 +35,8 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-watchers-spec
|
||||
|
||||
## Configure
|
||||
|
||||
### Prometheus scrape config
|
||||
|
||||
Add the following scrape configs to prometheus config file (`monitoring-watchers-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder:
|
||||
|
||||
```yml
|
||||
@ -100,6 +102,27 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
||||
|
||||
Add scrape config as done above for any additional watcher to add it to the Watchers dashboard.
|
||||
|
||||
### Grafana alerts config
|
||||
|
||||
Place the pre-configured watcher alerts rules in Grafana provisioning directory:
|
||||
|
||||
```bash
|
||||
cp monitoring-watchers-deployment/config/monitoring/watcher-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/
|
||||
```
|
||||
|
||||
Update the alerting contact points config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
|
||||
|
||||
Add corresponding routes to the notification policies config (`monitoring-watchers-deployment/monitoring/grafana/provisioning/alerting/policies.yaml`) with appropriate object-matchers:
|
||||
|
||||
```yml
|
||||
...
|
||||
routes:
|
||||
- receiver: SlackNotifier
|
||||
object_matchers:
|
||||
# Add matchers below
|
||||
- ['grafana_folder', '=', 'WatcherAlerts']
|
||||
```
|
||||
|
||||
### Env
|
||||
|
||||
Set the following env variables in the deployment env config file (`monitoring-watchers-deployment/config.env`):
|
||||
|
Loading…
Reference in New Issue
Block a user