[WIP] Add stack for monitoring testnet services #46

Closed
nabarun wants to merge 1 commits from ng-monitoring into main
14 changed files with 27249 additions and 0 deletions

View File

@ -0,0 +1,31 @@
version: "3.7"
services:
grafana:
image: grafana/grafana:10.2.3
restart: always
environment:
GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL}
CERC_GRAFANA_ALERTS_SUBGRAPH_IDS: ${CERC_GRAFANA_ALERTS_SUBGRAPH_IDS}
volumes:
- ../config/monitoring/grafana/provisioning:/etc/grafana/provisioning
- ../config/monitoring/grafana/dashboards:/etc/grafana/dashboards
- ../config/monitoring/update-grafana-alerts-config.sh:/update-grafana-alerts-config.sh
- grafana_storage:/var/lib/grafana
user: root
entrypoint: ["bash", "-c"]
command: |
"/run.sh"
ports:
- "3000"
extra_hosts:
- "host.docker.internal:host-gateway"
healthcheck:
test: ["CMD", "nc", "-vz", "localhost", "3000"]
interval: 30s
timeout: 5s
retries: 10
start_period: 3s
volumes:
grafana_storage:

View File

@ -0,0 +1,42 @@
version: "3.7"
services:
prometheus:
image: prom/prometheus:v2.49.1
restart: always
volumes:
- ../config/monitoring/prometheus:/etc/prometheus
- prometheus_data:/prometheus
ports:
- "9090"
healthcheck:
test: ["CMD", "nc", "-vz", "localhost", "9090"]
interval: 30s
timeout: 5s
retries: 10
start_period: 3s
extra_hosts:
- "host.docker.internal:host-gateway"
blackbox:
image: prom/blackbox-exporter:latest
restart: always
volumes:
- ../config/monitoring/blackbox.yml:/etc/blackbox_exporter/config.yml
ports:
- '9115'
extra_hosts:
- "host.docker.internal:host-gateway"
postgres-exporter:
image: quay.io/prometheuscommunity/postgres-exporter
restart: always
volumes:
- ../config/monitoring/postgres-exporter.yml:/postgres_exporter.yml
ports:
- '9187'
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
prometheus_data:

View File

@ -0,0 +1,7 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_status_codes: [] #default to 2xx
method: GET

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,943 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "node.js prometheus client basic metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 11159,
"graphTooltip": 0,
"id": 15,
"links": [],
"liveNow": false,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 10,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 6,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "irate(process_cpu_user_seconds_total{instance=~\"$instance\"}[2m]) * 100",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "User CPU - {{instance}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "irate(process_cpu_system_seconds_total{instance=~\"$instance\"}[2m]) * 100",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Sys CPU - {{instance}}",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Process CPU Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 9,
"x": 10,
"y": 0
},
"hiddenSeries": false,
"id": 8,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_eventloop_lag_seconds{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Event Loop Lag",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "text",
"mode": "fixed"
},
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 5,
"x": 19,
"y": 0
},
"id": 2,
"interval": "",
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"textMode": "name",
"wideLayout": true
},
"pluginVersion": "10.2.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(nodejs_version_info{instance=~\"$instance\"}) by (version)",
"format": "time_series",
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{version}}",
"refId": "A"
}
],
"title": "Node.js Version",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#F2495C",
"mode": "fixed"
},
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 5,
"x": 19,
"y": 3
},
"id": 4,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.2.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "sum(changes(process_start_time_seconds{instance=~\"$instance\"}[1m]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Process Restart Times",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 16,
"x": 0,
"y": 7
},
"hiddenSeries": false,
"id": 7,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "process_resident_memory_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Process Memory - {{instance}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_heap_size_total_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Heap Total - {{instance}}",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_heap_size_used_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Heap Used - {{instance}}",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_external_memory_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "External Memory - {{instance}}",
"refId": "D"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Process Memory Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 7
},
"hiddenSeries": false,
"id": 9,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_active_handles_total{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Active Handler - {{instance}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_active_requests_total{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Active Request - {{instance}}",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Active Handlers/Requests Total",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 14
},
"hiddenSeries": false,
"id": 10,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_heap_space_size_total_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Heap Total - {{instance}} - {{space}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Heap Total Detail",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 14
},
"hiddenSeries": false,
"id": 11,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_heap_space_size_used_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Heap Used - {{instance}} - {{space}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Heap Used Detail",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 14
},
"hiddenSeries": false,
"id": 12,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "nodejs_heap_space_size_available_bytes{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Heap Used - {{instance}} - {{space}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Heap Available Detail",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"refresh": "10s",
"schemaVersion": 38,
"tags": [
"nodejs"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "label_values(nodejs_version_info, instance)",
"hide": 0,
"includeAll": true,
"label": "instance",
"multi": true,
"name": "instance",
"options": [],
"query": "label_values(nodejs_version_info, instance)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "NodeJS Application Dashboard",
"uid": "PTSqcpJWk",
"version": 3,
"weekStart": ""
}

View File

@ -0,0 +1,14 @@
# https://www.clever-cloud.com/blog/features/2021/12/03/slack-alerts-for-grafana/
apiVersion: 1
contactPoints:
- orgId: 1
name: SlackNotifier
receivers:
- uid: a71b06e3-58b6-41fe-af65-fbbb29653951
type: slack
settings:
# Slack hook URL (see https://api.slack.com/messaging/webhooks)
url: <YOUR_SLACK_HOOK_URL>
disableResolveMessage: false

View File

@ -0,0 +1,15 @@
# https://grafana.com/docs/grafana/latest/alerting/alerting-rules/create-notification-policy/
apiVersion: 1
policies:
- orgId: 1
receiver: grafana-default-email
group_by:
- grafana_folder
- alertname
routes:
- receiver: SlackNotifier
object_matchers:
# Add matchers below
# - ['grafana_folder', '=', 'MyAlerts']

View File

@ -0,0 +1,10 @@
apiVersion: 1
providers:
- name: dashboards
type: file
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/dashboards
foldersFromFilesStructure: true

View File

@ -0,0 +1,16 @@
apiVersion: 1
datasources:
- id: 1
orgId: 1
name: Prometheus
type: prometheus
typeName: Prometheus
typeLogoUrl: public/app/plugins/datasource/prometheus/img/prometheus_logo.svg
access: proxy
url: http://prometheus:9090
isDefault: true
jsonData:
httpMethod: POST
version: 1
editable: true

View File

@ -0,0 +1,69 @@
global:
scrape_interval: 10s
evaluation_interval: 15s
rule_files:
# - "first.rules"
# - "second.rules"
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
- job_name: node
static_configs:
# Add node-exporter targets to be monitored below
# - targets: ['example-host:9100']
# labels:
# instance: 'my-host'
- job_name: 'blackbox'
scrape_interval: 10s
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
# Add URLs to be monitored below
- targets:
# - https://github.com
relabel_configs:
- source_labels: [__address__]
regex: (.*)(:80)?
target_label: __param_target
- source_labels: [__param_target]
regex: (.*)
target_label: instance
replacement: ${1}
- source_labels: []
regex: .*
target_label: __address__
replacement: blackbox:9115
- job_name: 'postgres'
scrape_interval: 30s
scrape_timeout: 30s
static_configs:
# Add DB targets below
# - targets: [example-server:5432]
# labels:
# instance: 'example-label'
metrics_path: /probe
params:
auth_module: [foo]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: postgres-exporter:9187
- job_name: laconicd
metrics_path: /metrics
scrape_interval: 30s
static_configs:
# Add laconicd REST endpoint target with host and port (1317)
# - targets: ['example-host:1317']
params:
format: ['prometheus']

View File

@ -0,0 +1,122 @@
# Monitoring Testnet services
Instructions to setup and run monitoring stack
## Create a deployment
After completing [setup](./README.md#setup), create a spec file for the deployment, which will map the stack's ports and volumes to the host:
```bash
laconic-so --stack monitoring deploy init --output monitoring-testnet-spec.yml
```
### Ports
Edit `network` in spec file to map container ports to same ports in host:
```
...
network:
ports:
prometheus:
- '9090:9090'
grafana:
- '3000:3000'
...
```
---
Once you've made any needed changes to the spec file, create a deployment from it:
```bash
laconic-so --stack monitoring deploy create --spec-file monitoring-testnet-spec.yml --deployment-dir monitoring-testnet-deployment
```
## Configure
### Prometheus scrape config
Add the following scrape configs to prometheus config file (`monitoring-testnet-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder:
```yml
...
- job_name: 'blackbox'
...
static_configs:
- targets:
- <LACONICD_GQL_ENDPOINT>
...
- job_name: laconicd
static_configs:
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
...
```
### Grafana alerts config
Place the pre-configured alerts rules in Grafana provisioning directory:
```bash
cp monitoring-testnet-deployment/config/monitoring/testnet-alert-rules.yml monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/
```
Update the alerting contact points config (`monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
Add corresponding routes to the notification policies config (`monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/policies.yml`) with appropriate object-matchers:
```yml
...
routes:
- receiver: SlackNotifier
object_matchers:
# Add matchers below
- ['grafana_folder', '=~', 'TestnetAlerts']
```
### Env
Set the following env variables in the deployment env config file (`monitoring-testnet-deployment/config.env`):
```bash
# Infura key to be used
CERC_INFURA_KEY=
# Grafana server host URL to be used
# (Optional, default: http://localhost:3000)
GF_SERVER_ROOT_URL=
```
## Start the stack
Start the deployment:
```bash
laconic-so deployment --dir monitoring-testnet-deployment start
```
* List and check the health status of all the containers using `docker ps` and wait for them to be `healthy`
* Grafana should now be visible at http://localhost:3000 with configured dashboards
## Clean up
To stop monitoring services running in the background, while preserving data:
```bash
# Only stop the docker containers
laconic-so deployment --dir monitoring-testnet-deployment stop
# Run 'start' to restart the deployment
```
To stop monitoring services and also delete data:
```bash
# Stop the docker containers
laconic-so deployment --dir monitoring-testnet-deployment stop --delete-volumes
# Remove deployment directory (deployment will have to be recreated for a re-run)
rm -rf monitoring-testnet-deployment
```

View File

@ -0,0 +1,5 @@
version: "0.1"
name: monitoring testnet
pods:
- prom-server
- grafana