forked from cerc-io/stack-orchestrator
Compare commits
No commits in common. "ng-monitoring-testnet" and "main" have entirely different histories.
ng-monitor
...
main
@ -1,8 +1,7 @@
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
timeout: 15s
|
||||
timeout: 5s
|
||||
http:
|
||||
valid_status_codes: [] #default to 2xx
|
||||
method: GET
|
||||
preferred_ip_protocol: ip4
|
||||
|
@ -133,13 +133,10 @@
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "probe_success{instance=~\"$target\"}",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "$target ($url)",
|
||||
"title": "$target status",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
@ -1060,29 +1057,6 @@
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"definition": "label_values(probe_success{instance=~\"$target\"}, url)",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"multi": false,
|
||||
"name": "url",
|
||||
"options": [],
|
||||
"query": "label_values(probe_success{instance=~\"$target\"}, url)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -8,7 +8,6 @@ policies:
|
||||
group_by:
|
||||
- grafana_folder
|
||||
- alertname
|
||||
- instance
|
||||
routes:
|
||||
- receiver: SlackNotifier
|
||||
object_matchers:
|
||||
|
@ -25,34 +25,20 @@ scrape_configs:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
# Add URLs to be monitored below
|
||||
# - targets: ["https://github.com"]
|
||||
# labels:
|
||||
# alias: "GitHub"
|
||||
# url: "https://github.com"
|
||||
- targets:
|
||||
# - https://github.com
|
||||
relabel_configs:
|
||||
# Forward the original target URL as the 'target' parameter.
|
||||
- source_labels: [__address__]
|
||||
regex: (.*)(:80)?
|
||||
target_label: __param_target
|
||||
# Use the custom alias if defined for the 'instance' label.
|
||||
- source_labels: [alias]
|
||||
- source_labels: [__param_target]
|
||||
regex: (.*)
|
||||
target_label: instance
|
||||
action: replace
|
||||
# Preserve the URL label
|
||||
- source_labels: [url]
|
||||
target_label: url
|
||||
action: replace
|
||||
# If no alias is set, fall back to the target URL.
|
||||
- source_labels: [instance]
|
||||
regex: ^$
|
||||
target_label: instance
|
||||
replacement: ${__param_target}
|
||||
# Finally, tell Prometheus to scrape the blackbox_exporter.
|
||||
- target_label: __address__
|
||||
replacement: ${1}
|
||||
- source_labels: []
|
||||
regex: .*
|
||||
target_label: __address__
|
||||
replacement: blackbox:9115
|
||||
# Drop the original alias label as it's now redundant with instance
|
||||
- action: labeldrop
|
||||
regex: ^alias$
|
||||
|
||||
- job_name: chain_heads
|
||||
scrape_interval: 10s
|
||||
|
@ -1,64 +0,0 @@
|
||||
apiVersion: 1
|
||||
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: testnet
|
||||
folder: TestnetAlerts
|
||||
interval: 30s
|
||||
rules:
|
||||
- uid: endpoint_down
|
||||
title: endpoint_down
|
||||
condition: condition
|
||||
data:
|
||||
- refId: probe_success
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: PBFA97CFB590B2093
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFA97CFB590B2093
|
||||
editorMode: code
|
||||
expr: probe_success{job="blackbox"}
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: probe_success
|
||||
- refId: condition
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: eq
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: ${probe_success} == 0
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: condition
|
||||
type: math
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: Endpoint {{ $labels.instance }} is down
|
||||
isPaused: false
|
@ -1,170 +0,0 @@
|
||||
# Monitoring Testnet
|
||||
|
||||
Instructions to setup and run monitoring stack for testnet services
|
||||
|
||||
## Create a deployment
|
||||
|
||||
Create a spec file for the deployment, which will map the stack's ports and volumes to the host:
|
||||
|
||||
```bash
|
||||
laconic-so --stack monitoring deploy init --output monitoring-testnet-spec.yml
|
||||
```
|
||||
|
||||
### Ports
|
||||
|
||||
Edit `network` in spec file to map container ports to same ports in host:
|
||||
|
||||
```
|
||||
...
|
||||
network:
|
||||
ports:
|
||||
prometheus:
|
||||
- '9090:9090'
|
||||
grafana:
|
||||
- '3000:3000'
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Once you've made any needed changes to the spec file, create a deployment from it:
|
||||
|
||||
```bash
|
||||
laconic-so --stack monitoring deploy create --spec-file monitoring-testnet-spec.yml --deployment-dir monitoring-testnet-deployment
|
||||
```
|
||||
|
||||
## Configure
|
||||
|
||||
### Prometheus scrape config
|
||||
|
||||
- Setup the following scrape configs in prometheus config file (`monitoring-testnet-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder:
|
||||
|
||||
```yml
|
||||
...
|
||||
- job_name: 'blackbox'
|
||||
...
|
||||
static_configs:
|
||||
- targets: ["https://wallet.laconic.com"]
|
||||
labels:
|
||||
alias: "Wallet App"
|
||||
url: "https://wallet.laconic.com"
|
||||
- targets: ["https://laconicd-sapo.laconic.com"]
|
||||
labels:
|
||||
alias: "Node laconicd"
|
||||
url: "https://laconicd-sapo.laconic.com"
|
||||
- targets: ["https://console-sapo.laconic.com"]
|
||||
labels:
|
||||
alias: "Console App"
|
||||
url: "https://console-sapo.laconic.com"
|
||||
- targets: ["https://fixturenet-eth.laconic.com"]
|
||||
labels:
|
||||
alias: "Fixturenet ETH"
|
||||
url: "https://fixturenet-eth.laconic.com"
|
||||
- targets: ["https://deploy.laconic.com"]
|
||||
labels:
|
||||
alias: "Deploy App"
|
||||
url: "https://deploy.laconic.com"
|
||||
- targets: ["https://deploy-backend.laconic.com/staging/version"]
|
||||
labels:
|
||||
alias: "Deploy Backend"
|
||||
url: "https://deploy-backend.laconic.com/staging/version"
|
||||
- targets: ["https://container-registry.apps.vaasl.io"]
|
||||
labels:
|
||||
alias: "Container Registry"
|
||||
url: "https://container-registry.apps.vaasl.io"
|
||||
- targets: ["https://webapp-deployer-api.apps.vaasl.io"]
|
||||
labels:
|
||||
alias: "Webapp Deployer API"
|
||||
url: "https://webapp-deployer-api.apps.vaasl.io"
|
||||
- targets: ["https://webapp-deployer-ui.apps.vaasl.io"]
|
||||
labels:
|
||||
alias: "Webapp Deployer UI"
|
||||
url: "https://webapp-deployer-ui.apps.vaasl.io"
|
||||
...
|
||||
- job_name: laconicd
|
||||
...
|
||||
static_configs:
|
||||
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
|
||||
# Example: 'host.docker.internal:3317'
|
||||
```
|
||||
|
||||
- Remove docker compose services which are not required in `monitoring-testnet-deployment/compose/docker-compose-prom-server.yml`
|
||||
- `ethereum-chain-head-exporter`
|
||||
- `filecoin-chain-head-exporter`
|
||||
- `graph-node-upstream-head-exporter`
|
||||
- `postgres-exporter`
|
||||
|
||||
### Grafana dashboards
|
||||
|
||||
Remove some of the existing dashboards which are not required in monitoring testnet
|
||||
```
|
||||
cd monitoring-testnet-deployment/config/monitoring/grafana/dashboards
|
||||
rm postgres-dashboard.json subgraphs-dashboard.json watcher-dashboard.json
|
||||
cd -
|
||||
```
|
||||
<!-- TODO: Check node-exporter-full.json, nodejs-app-dashboard.json -->
|
||||
|
||||
### Grafana alerts config
|
||||
|
||||
Place the pre-configured alerts rules in Grafana provisioning directory:
|
||||
|
||||
```bash
|
||||
# watcher alert rules
|
||||
cp monitoring-testnet-deployment/config/monitoring/testnet-alert-rules.yml monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/
|
||||
```
|
||||
|
||||
Update the alerting contact points config (`monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
|
||||
|
||||
Add corresponding routes to the notification policies config (`monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/policies.yml`) with appropriate object-matchers:
|
||||
|
||||
```yml
|
||||
...
|
||||
routes:
|
||||
- receiver: SlackNotifier
|
||||
object_matchers:
|
||||
# Add matchers below
|
||||
- ['grafana_folder', '=~', 'TestnetAlerts']
|
||||
```
|
||||
|
||||
### Env
|
||||
|
||||
Set the following env variables in the deployment env config file (`monitoring-testnet-deployment/config.env`):
|
||||
|
||||
```bash
|
||||
# Grafana server host URL to be used
|
||||
# (Optional, default: http://localhost:3000)
|
||||
GF_SERVER_ROOT_URL=
|
||||
```
|
||||
|
||||
## Start the stack
|
||||
|
||||
Start the deployment:
|
||||
|
||||
```bash
|
||||
laconic-so deployment --dir monitoring-testnet-deployment start
|
||||
```
|
||||
|
||||
* List and check the health status of all the containers using `docker ps` and wait for them to be `healthy`
|
||||
|
||||
* Grafana should now be visible at http://localhost:3000 with configured dashboards
|
||||
|
||||
## Clean up
|
||||
|
||||
To stop monitoring services running in the background, while preserving data:
|
||||
|
||||
```bash
|
||||
# Only stop the docker containers
|
||||
laconic-so deployment --dir monitoring-watchers-deployment stop
|
||||
|
||||
# Run 'start' to restart the deployment
|
||||
```
|
||||
|
||||
To stop monitoring services and also delete data:
|
||||
|
||||
```bash
|
||||
# Stop the docker containers
|
||||
laconic-so deployment --dir monitoring-watchers-deployment stop --delete-volumes
|
||||
|
||||
# Remove deployment directory (deployment will have to be recreated for a re-run)
|
||||
rm -rf monitoring-watchers-deployment
|
||||
```
|
@ -44,12 +44,9 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
|
||||
- job_name: 'blackbox'
|
||||
...
|
||||
static_configs:
|
||||
- targets: ["<AZIMUTH_GATEWAY_GQL_ENDPOINT>"]
|
||||
labels:
|
||||
alias: "Azimuth Watcher"
|
||||
- targets: ["<LACONICD_GQL_ENDPOINT>"]
|
||||
labels:
|
||||
alias: "Node (laconicd)"
|
||||
- targets:
|
||||
- <AZIMUTH_GATEWAY_GQL_ENDPOINT>
|
||||
- <LACONICD_GQL_ENDPOINT>
|
||||
...
|
||||
- job_name: laconicd
|
||||
static_configs:
|
||||
|
Loading…
Reference in New Issue
Block a user