Update monitoring stack with additional dashboards and watcher metrics (#693)
* Include retry jobs and update default refresh intervals * Add prometheus blackbox exporter and it's dashboard * Add NodeJS application dashboard * Allow UI updates * Update watcher dashboards for upstream and external chain heads * Update watcher dashboards with watcher config metrics * Upgrade sushiswap and azimuth watchers * Removed fixed title size values * Update instructions * Update instructions for env config * Update instructions with setup
This commit is contained in:
parent
42b92f7e23
commit
4a1a46facc
@ -18,5 +18,30 @@ services:
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
blackbox:
|
||||
image: prom/blackbox-exporter:latest
|
||||
restart: always
|
||||
volumes:
|
||||
- ../config/monitoring/blackbox.yml:/etc/blackbox_exporter/config.yml
|
||||
ports:
|
||||
- '9115'
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
chain-head-exporter:
|
||||
image: cerc/watcher-ts:local
|
||||
restart: always
|
||||
working_dir: /app/packages/cli
|
||||
environment:
|
||||
ETH_RPC_ENDPOINT: ${CERC_ETH_RPC_ENDPOINT}
|
||||
FIL_RPC_ENDPOINT: ${CERC_FIL_RPC_ENDPOINT}
|
||||
ETH_RPC_API_KEY: ${CERC_INFURA_KEY}
|
||||
PORT: ${CERC_METRICS_PORT}
|
||||
command: ["sh", "-c", "yarn export-metrics:chain-heads"]
|
||||
ports:
|
||||
- '5000'
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
|
7
stack_orchestrator/data/config/monitoring/blackbox.yml
Normal file
7
stack_orchestrator/data/config/monitoring/blackbox.yml
Normal file
@ -0,0 +1,7 @@
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
valid_status_codes: [] #default to 2xx
|
||||
method: GET
|
@ -0,0 +1,943 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "node.js prometheus client basic metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 11159,
|
||||
"graphTooltip": 0,
|
||||
"id": 15,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 10,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "irate(process_cpu_user_seconds_total{instance=~\"$instance\"}[2m]) * 100",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "User CPU - {{instance}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "irate(process_cpu_system_seconds_total{instance=~\"$instance\"}[2m]) * 100",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Sys CPU - {{instance}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Process CPU Usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percent",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 9,
|
||||
"x": 10,
|
||||
"y": 0
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_eventloop_lag_seconds{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Event Loop Lag",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "text",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 19,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "",
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "10.2.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(nodejs_version_info{instance=~\"$instance\"}) by (version)",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{version}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Node.js Version",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "#F2495C",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 5,
|
||||
"x": 19,
|
||||
"y": 3
|
||||
},
|
||||
"id": 4,
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "10.2.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(changes(process_start_time_seconds{instance=~\"$instance\"}[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Process Restart Times",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 16,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "process_resident_memory_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Process Memory - {{instance}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_heap_size_total_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Heap Total - {{instance}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_heap_size_used_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Heap Used - {{instance}}",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_external_memory_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "External Memory - {{instance}}",
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Process Memory Usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 7
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_active_handles_total{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Active Handler - {{instance}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_active_requests_total{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Active Request - {{instance}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Active Handlers/Requests Total",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_heap_space_size_total_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Heap Total - {{instance}} - {{space}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Heap Total Detail",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 14
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_heap_space_size_used_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Heap Used - {{instance}} - {{space}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Heap Used Detail",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 14
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "nodejs_heap_space_size_available_bytes{instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Heap Used - {{instance}} - {{space}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Heap Available Detail",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 38,
|
||||
"tags": [
|
||||
"nodejs"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"definition": "label_values(nodejs_version_info, instance)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [],
|
||||
"query": "label_values(nodejs_version_info, instance)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-15m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "NodeJS Application Dashboard",
|
||||
"uid": "PTSqcpJWk",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -4,6 +4,7 @@ providers:
|
||||
- name: dashboards
|
||||
type: file
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/dashboards
|
||||
foldersFromFilesStructure: true
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
scrape_interval: 10s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
@ -10,3 +10,33 @@ scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'blackbox'
|
||||
scrape_interval: 10s
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
# Add URLs to be monitored below
|
||||
- targets:
|
||||
# - https://github.com
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
regex: (.*)(:80)?
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
regex: (.*)
|
||||
target_label: instance
|
||||
replacement: ${1}
|
||||
- source_labels: []
|
||||
regex: .*
|
||||
target_label: __address__
|
||||
replacement: blackbox:9115
|
||||
|
||||
- job_name: chain_heads
|
||||
scrape_interval: 10s
|
||||
metrics_path: /metrics
|
||||
scheme: http
|
||||
static_configs:
|
||||
- targets: ['chain-head-exporter:5000']
|
||||
|
||||
|
@ -84,8 +84,6 @@
|
||||
subgraphEventsOrder = true
|
||||
# Filecoin block time: https://docs.filecoin.io/basics/the-blockchain/blocks-and-tipsets#blocktime
|
||||
blockDelayInMilliSecs = 30000
|
||||
prefetchBlocksInMem = false
|
||||
prefetchBlockCount = 10
|
||||
|
||||
# Boolean to switch between modes of processing events when starting the server.
|
||||
# Setting to true will fetch filtered events and required blocks in a range of blocks and then process them.
|
||||
|
@ -84,8 +84,6 @@
|
||||
subgraphEventsOrder = true
|
||||
# Filecoin block time: https://docs.filecoin.io/basics/the-blockchain/blocks-and-tipsets#blocktime
|
||||
blockDelayInMilliSecs = 30000
|
||||
prefetchBlocksInMem = false
|
||||
prefetchBlockCount = 10
|
||||
|
||||
# Boolean to switch between modes of processing events when starting the server.
|
||||
# Setting to true will fetch filtered events and required blocks in a range of blocks and then process them.
|
||||
|
@ -40,20 +40,36 @@ network:
|
||||
ports:
|
||||
watcher-db:
|
||||
- 0.0.0.0:15432:5432
|
||||
azimuth-watcher-job-runner:
|
||||
- 0.0.0.0:9000:9000
|
||||
azimuth-watcher-server:
|
||||
- 0.0.0.0:3001:3001
|
||||
censures-watcher-job-runner:
|
||||
- 0.0.0.0:9002:9002
|
||||
censures-watcher-server:
|
||||
- 0.0.0.0:3002:3002
|
||||
claims-watcher-job-runner:
|
||||
- 0.0.0.0:9004:9004
|
||||
claims-watcher-server:
|
||||
- 0.0.0.0:3003:3003
|
||||
conditional-star-release-watcher-job-runner:
|
||||
- 0.0.0.0:9006:9006
|
||||
conditional-star-release-watcher-server:
|
||||
- 0.0.0.0:3004:3004
|
||||
delegated-sending-watcher-job-runner:
|
||||
- 0.0.0.0:9008:9008
|
||||
delegated-sending-watcher-server:
|
||||
- 0.0.0.0:3005:3005
|
||||
ecliptic-watcher-job-runner:
|
||||
- 0.0.0.0:9010:9010
|
||||
ecliptic-watcher-server:
|
||||
- 0.0.0.0:3006:3006
|
||||
linear-star-release-watcher-job-runner:
|
||||
- 0.0.0.0:9012:9012
|
||||
linear-star-release-watcher-server:
|
||||
- 0.0.0.0:3007:3007
|
||||
polls-watcher-job-runner:
|
||||
- 0.0.0.0:9014:9014
|
||||
polls-watcher-server:
|
||||
- 0.0.0.0:3008:3008
|
||||
gateway-server:
|
||||
|
@ -1,7 +1,7 @@
|
||||
version: "1.0"
|
||||
name: azimuth
|
||||
repos:
|
||||
- github.com/cerc-io/azimuth-watcher-ts@v0.1.2
|
||||
- github.com/cerc-io/azimuth-watcher-ts@v0.1.3
|
||||
containers:
|
||||
- cerc/watcher-azimuth
|
||||
pods:
|
||||
|
@ -1,8 +1,26 @@
|
||||
# monitoring
|
||||
|
||||
* Instructions to setup and run a Prometheus server and Grafana dashboard
|
||||
* Comes with the following built-in exporters / dashboards:
|
||||
* [Prometheus Blackbox Exporter](https://grafana.com/grafana/dashboards/7587-prometheus-blackbox-exporter/) - for tracking HTTP endpoints
|
||||
* [NodeJS Application Dashboard](https://grafana.com/grafana/dashboards/11159-nodejs-application-dashboard/) - for default NodeJS metrics
|
||||
* Chain Head Exporter - for tracking chain heads given external ETH RPC endpoints
|
||||
* See [monitoring-watchers.md](./monitoring-watchers.md) for an example usage of the stack with pre-configured dashboards for watchers
|
||||
|
||||
## Setup
|
||||
|
||||
Clone required repositories:
|
||||
|
||||
```bash
|
||||
laconic-so --stack monitoring setup-repositories --git-ssh --pull
|
||||
```
|
||||
|
||||
Build the container images:
|
||||
|
||||
```bash
|
||||
laconic-so --stack monitoring build-containers
|
||||
```
|
||||
|
||||
## Create a deployment
|
||||
|
||||
First, create a spec file for the deployment, which will map the stack's ports and volumes to the host:
|
||||
@ -43,7 +61,7 @@ laconic-so --stack monitoring deploy create --spec-file monitoring-spec.yml --de
|
||||
|
||||
### Prometheus Config
|
||||
|
||||
Add desired scrape configs to prometheus config file (`monitoring-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder; for example:
|
||||
* Add desired scrape configs to prometheus config file (`monitoring-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder; for example:
|
||||
|
||||
```yml
|
||||
...
|
||||
@ -54,12 +72,43 @@ Add desired scrape configs to prometheus config file (`monitoring-deployment/con
|
||||
- targets: ['<METRICS_ENDPOINT_HOST>:<METRICS_ENDPOINT_PORT>']
|
||||
```
|
||||
|
||||
* Also update the `blackbox` job to add any endpoints to be monitored on the Blackbox dashboard:
|
||||
|
||||
```yml
|
||||
...
|
||||
- job_name: 'blackbox'
|
||||
...
|
||||
static_configs:
|
||||
# Add URLs to be monitored below
|
||||
- targets:
|
||||
- <HTTP_ENDPOINT_1>
|
||||
- <HTTP_ENDPOINT_2>
|
||||
```
|
||||
|
||||
Note: Use `host.docker.internal` as host to access ports on the host machine
|
||||
|
||||
### Grafana Config
|
||||
|
||||
Place the dashboard json files in grafana dashboards config directory (`monitoring-deployment/config/monitoring/grafana/dashboards`) in the deployment folder
|
||||
|
||||
### Env
|
||||
|
||||
Set the following env variables in the deployment env config file (`monitoring-deployment/config.env`):
|
||||
|
||||
```bash
|
||||
# External ETH RPC endpoint (ethereum)
|
||||
# (Optional, default: https://mainnet.infura.io/v3)
|
||||
CERC_ETH_RPC_ENDPOINT=
|
||||
|
||||
# Infura key to be used
|
||||
# (Optional, used with ETH_RPC_ENDPOINT if provided)
|
||||
CERC_INFURA_KEY=
|
||||
|
||||
# External ETH RPC endpoint (filecoin)
|
||||
# (Optional, default: https://api.node.glif.io/rpc/v1)
|
||||
CERC_FIL_RPC_ENDPOINT=
|
||||
```
|
||||
|
||||
## Start the stack
|
||||
|
||||
Start the deployment:
|
||||
|
@ -4,7 +4,7 @@ Instructions to setup and run monitoring stack with pre-configured watcher dashb
|
||||
|
||||
## Create a deployment
|
||||
|
||||
First, create a spec file for the deployment, which will map the stack's ports and volumes to the host:
|
||||
After completing [setup](./README.md#setup), create a spec file for the deployment, which will map the stack's ports and volumes to the host:
|
||||
|
||||
```bash
|
||||
laconic-so --stack monitoring deploy init --output monitoring-watchers-spec.yml
|
||||
@ -41,7 +41,14 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
||||
|
||||
```yml
|
||||
...
|
||||
- job_name: 'blackbox'
|
||||
...
|
||||
static_configs:
|
||||
- targets:
|
||||
- <AZIMUTH_GATEWAY_GQL_ENDPOINT>
|
||||
...
|
||||
- job_name: azimuth
|
||||
scrape_interval: 10s
|
||||
metrics_path: /metrics
|
||||
scheme: http
|
||||
static_configs:
|
||||
@ -71,6 +78,7 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
||||
instance: 'polls'
|
||||
|
||||
- job_name: sushi
|
||||
scrape_interval: 20s
|
||||
metrics_path: /metrics
|
||||
scheme: http
|
||||
static_configs:
|
||||
@ -90,6 +98,15 @@ In the deployment folder, copy over the pre-configured watcher dashboard JSON fi
|
||||
cp -r monitoring-watchers-deployment/config/monitoring/grafana/watcher-dashboards/* monitoring-watchers-deployment/config/monitoring/grafana/dashboards/
|
||||
```
|
||||
|
||||
### Env
|
||||
|
||||
Set the following env variables in the deployment env config file (`monitoring-watchers-deployment/config.env`):
|
||||
|
||||
```bash
|
||||
# Infura key to be used
|
||||
CERC_INFURA_KEY=
|
||||
```
|
||||
|
||||
## Start the stack
|
||||
|
||||
Start the deployment:
|
||||
|
@ -1,7 +1,9 @@
|
||||
version: "0.1"
|
||||
name: monitoring
|
||||
repos:
|
||||
- github.com/cerc-io/watcher-ts@v0.2.79
|
||||
containers:
|
||||
- cerc/watcher-ts
|
||||
pods:
|
||||
- prom-server
|
||||
- grafana
|
||||
|
@ -2,7 +2,7 @@ version: "1.0"
|
||||
name: sushiswap-v3
|
||||
description: "SushiSwap v3 watcher stack"
|
||||
repos:
|
||||
- github.com/cerc-io/sushiswap-v3-watcher-ts@v0.1.4
|
||||
- github.com/cerc-io/sushiswap-v3-watcher-ts@v0.1.5
|
||||
containers:
|
||||
- cerc/watcher-sushiswap-v3
|
||||
pods:
|
||||
|
Loading…
Reference in New Issue
Block a user