diff --git a/stack_orchestrator/data/compose/docker-compose-prom-server.yml b/stack_orchestrator/data/compose/docker-compose-prom-server.yml index 26a61e88..e3faecc8 100644 --- a/stack_orchestrator/data/compose/docker-compose-prom-server.yml +++ b/stack_orchestrator/data/compose/docker-compose-prom-server.yml @@ -18,5 +18,15 @@ services: extra_hosts: - "host.docker.internal:host-gateway" + blackbox: + image: prom/blackbox-exporter:latest + restart: always + volumes: + - ../config/monitoring/blackbox.yml:/etc/blackbox_exporter/config.yml + ports: + - '9115' + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: prometheus_data: diff --git a/stack_orchestrator/data/config/monitoring/blackbox.yml b/stack_orchestrator/data/config/monitoring/blackbox.yml new file mode 100644 index 00000000..4a9d72eb --- /dev/null +++ b/stack_orchestrator/data/config/monitoring/blackbox.yml @@ -0,0 +1,7 @@ +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_status_codes: [] #default to 2xx + method: GET diff --git a/stack_orchestrator/data/config/monitoring/grafana/dashboards/prometheus-blackbox-exporter.json b/stack_orchestrator/data/config/monitoring/grafana/dashboards/prometheus-blackbox-exporter.json new file mode 100644 index 00000000..638c4d5b --- /dev/null +++ b/stack_orchestrator/data/config/monitoring/grafana/dashboards/prometheus-blackbox-exporter.json @@ -0,0 +1,1097 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Prometheus Blackbox Exporter Overview", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 7587, + "graphTooltip": 0, + "id": 29, + "links": [], + "liveNow": false, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 138, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_duration_seconds{instance=~\"$target\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Global Probe Duration", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 15, + "panels": [], + "repeat": "target", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], + "title": "$target status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "DOWN" + }, + "1": { + "text": "UP" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#299c46", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 2, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_success{instance=~\"$target\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Status", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 4, + "y": 9 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_http_duration_seconds{instance=~\"$target\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "{{ phase }}", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "HTTP Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 9 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_duration_seconds{instance=~\"$target\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "seconds", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Probe Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "0": { + "text": "N/A" + }, + "1": { + "text": "YES" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 201 + }, + { + "color": "#d44a3a", + "value": 399 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 11 + }, + "id": 20, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_http_status_code{instance=~\"$target\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "HTTP Status Code", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 13 + }, + "id": 27, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_http_version{instance=~\"$target\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "HTTP Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "NO" + }, + "1": { + "text": "YES" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 15 + }, + "id": 18, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_http_ssl{instance=~\"$target\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "SSL", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [ + { + "options": { + "0": { + "text": "NO" + }, + "1": { + "text": "YES" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 1209600 + } + ] + }, + "unit": "dtdurations" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 10, + "x": 4, + "y": 15 + }, + "id": 19, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "probe_ssl_earliest_cert_expiry{instance=~\"$target\"} - time()", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "SSL Expiry", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 5, + "x": 14, + "y": 15 + }, + "id": 23, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "avg(probe_duration_seconds{instance=~\"$target\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Average Probe Duration", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 5, + "x": 19, + "y": 15 + }, + "id": 24, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "avg(probe_dns_lookup_time_seconds{instance=~\"$target\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Average DNS Lookup", + "type": "stat" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "tags": [ + "blackbox", + "prometheus" + ], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 10, + "auto_min": "10s", + "current": { + "selected": false, + "text": "10s", + "value": "10s" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": true, + "text": "10s", + "value": "10s" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "target", + "options": [], + "query": "label_values(probe_success, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Prometheus Blackbox Exporter", + "uid": "xtkCtBkiz", + "version": 2, + "weekStart": "" +} diff --git a/stack_orchestrator/data/config/monitoring/prometheus/prometheus.yml b/stack_orchestrator/data/config/monitoring/prometheus/prometheus.yml index e8e7b8b8..6b2e380e 100644 --- a/stack_orchestrator/data/config/monitoring/prometheus/prometheus.yml +++ b/stack_orchestrator/data/config/monitoring/prometheus/prometheus.yml @@ -10,3 +10,24 @@ scrape_configs: - job_name: prometheus static_configs: - targets: ['localhost:9090'] + + - job_name: 'blackbox' + metrics_path: /probe + params: + module: [http_2xx] + static_configs: + # Add URLs to be monitored below + - targets: + # - https://github.com + relabel_configs: + - source_labels: [__address__] + regex: (.*)(:80)? + target_label: __param_target + - source_labels: [__param_target] + regex: (.*) + target_label: instance + replacement: ${1} + - source_labels: [] + regex: .* + target_label: __address__ + replacement: blackbox:9115 diff --git a/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md b/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md index f49bdcda..711b3c37 100644 --- a/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md +++ b/stack_orchestrator/data/stacks/monitoring/monitoring-watchers.md @@ -40,6 +40,11 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-watchers-spec Add the following scrape configs to prometheus config file (`monitoring-watchers-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder: ```yml + - job_name: 'blackbox' + ... + static_configs: + - targets: + - ... - job_name: azimuth metrics_path: /metrics