From e0c3789f743152678ebc0403e4059fd7511bf9f0 Mon Sep 17 00:00:00 2001 From: Prathamesh Musale Date: Wed, 27 Dec 2023 10:03:46 +0530 Subject: [PATCH 1/5] Provision Grafana alert contactpoints and policies for Slack --- .../grafana/provisioning/alerting/contactpoints.yml | 12 ++++++++++++ .../grafana/provisioning/alerting/policies.yaml | 13 +++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml create mode 100644 stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml diff --git a/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml new file mode 100644 index 00000000..ee381682 --- /dev/null +++ b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +contactPoints: + - orgId: 1 + name: SlackNotifier + receivers: + - uid: a71b06e3-58b6-41fe-af65-fbbb29653951 + type: slack + settings: + # Slack hook URL + url: + disableResolveMessage: false diff --git a/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml new file mode 100644 index 00000000..1b4e2ddf --- /dev/null +++ b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml @@ -0,0 +1,13 @@ +apiVersion: 1 + +policies: + - orgId: 1 + receiver: grafana-default-email + group_by: + - grafana_folder + - alertname + routes: + - receiver: SlackNotifier + object_matchers: + # Add matchers below + # - ['grafana_folder', '=', 'MyAlerts'] -- 2.45.2 From 3b5a36348399189f188c4a3d447ac75b7452e76d Mon Sep 17 00:00:00 2001 From: Prathamesh Musale Date: Wed, 27 Dec 2023 11:53:07 +0530 Subject: [PATCH 2/5] Add watcher alert rules --- .../config/monitoring/watcher-alert-rules.yml | 971 ++++++++++++++++++ 1 file changed, 971 insertions(+) create mode 100644 stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml diff --git a/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml b/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml new file mode 100644 index 00000000..292ab4d9 --- /dev/null +++ b/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml @@ -0,0 +1,971 @@ +apiVersion: 1 + +groups: + - orgId: 1 + name: watcher + folder: WatcherAlerts + interval: 30s + rules: + # Azimuth + - uid: azimuth_diff_external + title: azimuth_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="azimuth", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="azimuth", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: censures_diff_external + title: censures_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="censures", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="censures", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: claims_diff_external + title: claims_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="claims", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="claims", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: conditional_star_release_diff_external + title: conditional_star_release_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="conditional_star_release", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="conditional_star_release", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: delegated_sending_diff_external + title: delegated_sending_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="delegated_sending", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="delegated_sending", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: ecliptic_diff_external + title: ecliptic_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="ecliptic", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="ecliptic", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: linear_star_release_diff_external + title: linear_star_release_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="linear_star_release", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="azimuth", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: polls_diff_external + title: polls_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="azimuth", instance="polls", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="ethereum"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="azimuth", instance="polls", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + + # Sushi + - uid: sushiswap_diff_external + title: sushiswap_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="sushi", instance="sushiswap", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="filecoin"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="sushi", instance="sushiswap", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false + - uid: merkl_sushiswap_diff_external + title: merkl_sushiswap_watcher_head_tracking + condition: condition + data: + - refId: diff + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + disableTextWrap: false + editorMode: code + expr: latest_block_number - on(chain) group_right sync_status_block_number{job="sushi", instance="merkl_sushiswap", kind="latest_indexed"} + fullMetaSearch: false + includeNullMetadata: true + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: diff + useBackend: false + - refId: latest_external + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: latest_block_number{chain="filecoin"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_external + - refId: latest_indexed + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: sync_status_block_number{job="sushi", instance="merkl_sushiswap", kind="latest_indexed"} + hide: false + instant: true + legendFormat: __auto + range: false + refId: latest_indexed + - refId: condition + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: ${diff} >= 16 + intervalMs: 1000 + maxDataPoints: 43200 + refId: condition + type: math + dashboardUid: cb9b746a-9abc-482e-9214-5231e0dd75ca + panelId: 24 + noDataState: Alerting + execErrState: Alerting + for: 15m + annotations: + __dashboardUid__: cb9b746a-9abc-482e-9214-5231e0dd75ca + __panelId__: "24" + summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }} + isPaused: false -- 2.45.2 From ce7d91d1bb7f80a2a07322cad58962924720a5b4 Mon Sep 17 00:00:00 2001 From: Prathamesh Musale Date: Wed, 27 Dec 2023 12:03:59 +0530 Subject: [PATCH 3/5] Update watcher monitoring instructions --- .../stacks/monitoring/monitoring-watchers.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md b/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md index 23335efb..3673b530 100644 --- a/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md +++ b/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md @@ -35,6 +35,8 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-watchers-spec ## Configure +### Prometheus scrape config + Add the following scrape configs to prometheus config file (`monitoring-watchers-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder: ```yml @@ -100,6 +102,27 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers Add scrape config as done above for any additional watcher to add it to the Watchers dashboard. +### Grafana alerts config + +Place the pre-configured watcher alerts rules in Grafana provisioning directory: + + ```bash + cp monitoring-watchers-deployment/config/monitoring/watcher-alert-rules.yml monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/ + ``` + +Update the alerting contact points config (`monitoring-watchers-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points + +Add corresponding routes to the notification policies config (`monitoring-watchers-deployment/monitoring/grafana/provisioning/alerting/policies.yaml`) with appropriate object-matchers: + + ```yml + ... + routes: + - receiver: SlackNotifier + object_matchers: + # Add matchers below + - ['grafana_folder', '=', 'WatcherAlerts'] + ``` + ### Env Set the following env variables in the deployment env config file (`monitoring-watchers-deployment/config.env`): -- 2.45.2 From 231c4f94ec2d0f12128ca7ada5a81815db78e685 Mon Sep 17 00:00:00 2001 From: Prathamesh Musale Date: Wed, 27 Dec 2023 14:12:25 +0530 Subject: [PATCH 4/5] Add listening port flag to node exporter command --- .../data/compose/docker-compose-node-exporter.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stack_orchestrator/data/compose/docker-compose-node-exporter.yml b/stack_orchestrator/data/compose/docker-compose-node-exporter.yml index 8b49c269..6fa02d6d 100644 --- a/stack_orchestrator/data/compose/docker-compose-node-exporter.yml +++ b/stack_orchestrator/data/compose/docker-compose-node-exporter.yml @@ -5,12 +5,11 @@ services: image: prom/node-exporter:latest restart: unless-stopped command: + - '--web.listen-address=:9100' - '--path.rootfs=/host' - '--collector.systemd' - '--collector.processes' network_mode: host pid: host - ports: - - 9100 volumes: - '/:/host:ro,rslave' -- 2.45.2 From 20497bfaaf8f2c23f6ef8bda76607d1ab12672dc Mon Sep 17 00:00:00 2001 From: Prathamesh Musale Date: Mon, 8 Jan 2024 12:03:41 +0530 Subject: [PATCH 5/5] Add reference links --- .../grafana/provisioning/alerting/contactpoints.yml | 4 +++- .../provisioning/alerting/{policies.yaml => policies.yml} | 2 ++ .../data/config/monitoring/watcher-alert-rules.yml | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) rename stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/{policies.yaml => policies.yml} (74%) diff --git a/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml index ee381682..5e924946 100644 --- a/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml +++ b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/contactpoints.yml @@ -1,3 +1,5 @@ +# https://www.clever-cloud.com/blog/features/2021/12/03/slack-alerts-for-grafana/ + apiVersion: 1 contactPoints: @@ -7,6 +9,6 @@ contactPoints: - uid: a71b06e3-58b6-41fe-af65-fbbb29653951 type: slack settings: - # Slack hook URL + # Slack hook URL (see https://api.slack.com/messaging/webhooks) url: disableResolveMessage: false diff --git a/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yml similarity index 74% rename from stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml rename to stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yml index 1b4e2ddf..b1beb31d 100644 --- a/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yaml +++ b/stack_orchestrator/data/config/monitoring/grafana/provisioning/alerting/policies.yml @@ -1,3 +1,5 @@ +# https://grafana.com/docs/grafana/latest/alerting/alerting-rules/create-notification-policy/ + apiVersion: 1 policies: diff --git a/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml b/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml index 292ab4d9..7e26ba14 100644 --- a/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml +++ b/stack_orchestrator/data/config/monitoring/watcher-alert-rules.yml @@ -1,3 +1,5 @@ +# https://grafana.com/docs/grafana/latest/alerting/alerting-rules/create-grafana-managed-rule/ + apiVersion: 1 groups: -- 2.45.2