Compare commits

..

No commits in common. "ng-monitoring-testnet" and "main" have entirely different histories.

7 changed files with 13 additions and 292 deletions

View File

@ -1,8 +1,7 @@
modules:
http_2xx:
prober: http
timeout: 15s
timeout: 5s
http:
valid_status_codes: [] #default to 2xx
method: GET
preferred_ip_protocol: ip4

View File

@ -133,13 +133,10 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"expr": "probe_success{instance=~\"$target\"}",
"format": "time_series",
"instant": true,
"refId": "A"
}
],
"title": "$target ($url)",
"title": "$target status",
"type": "row"
},
{
@ -1060,29 +1057,6 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "label_values(probe_success{instance=~\"$target\"}, url)",
"hide": 2,
"includeAll": false,
"multi": false,
"name": "url",
"options": [],
"query": "label_values(probe_success{instance=~\"$target\"}, url)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},

View File

@ -8,7 +8,6 @@ policies:
group_by:
- grafana_folder
- alertname
- instance
routes:
- receiver: SlackNotifier
object_matchers:

View File

@ -25,34 +25,20 @@ scrape_configs:
module: [http_2xx]
static_configs:
# Add URLs to be monitored below
# - targets: ["https://github.com"]
# labels:
# alias: "GitHub"
# url: "https://github.com"
- targets:
# - https://github.com
relabel_configs:
# Forward the original target URL as the 'target' parameter.
- source_labels: [__address__]
regex: (.*)(:80)?
target_label: __param_target
# Use the custom alias if defined for the 'instance' label.
- source_labels: [alias]
- source_labels: [__param_target]
regex: (.*)
target_label: instance
action: replace
# Preserve the URL label
- source_labels: [url]
target_label: url
action: replace
# If no alias is set, fall back to the target URL.
- source_labels: [instance]
regex: ^$
target_label: instance
replacement: ${__param_target}
# Finally, tell Prometheus to scrape the blackbox_exporter.
- target_label: __address__
replacement: ${1}
- source_labels: []
regex: .*
target_label: __address__
replacement: blackbox:9115
# Drop the original alias label as it's now redundant with instance
- action: labeldrop
regex: ^alias$
- job_name: chain_heads
scrape_interval: 10s

View File

@ -1,64 +0,0 @@
apiVersion: 1
groups:
- orgId: 1
name: testnet
folder: TestnetAlerts
interval: 30s
rules:
- uid: endpoint_down
title: endpoint_down
condition: condition
data:
- refId: probe_success
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
datasource:
type: prometheus
uid: PBFA97CFB590B2093
editorMode: code
expr: probe_success{job="blackbox"}
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: probe_success
- refId: condition
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: eq
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: ${probe_success} == 0
intervalMs: 1000
maxDataPoints: 43200
refId: condition
type: math
noDataState: Alerting
execErrState: Alerting
for: 5m
annotations:
summary: Endpoint {{ $labels.instance }} is down
isPaused: false

View File

@ -1,170 +0,0 @@
# Monitoring Testnet
Instructions to setup and run monitoring stack for testnet services
## Create a deployment
Create a spec file for the deployment, which will map the stack's ports and volumes to the host:
```bash
laconic-so --stack monitoring deploy init --output monitoring-testnet-spec.yml
```
### Ports
Edit `network` in spec file to map container ports to same ports in host:
```
...
network:
ports:
prometheus:
- '9090:9090'
grafana:
- '3000:3000'
...
```
---
Once you've made any needed changes to the spec file, create a deployment from it:
```bash
laconic-so --stack monitoring deploy create --spec-file monitoring-testnet-spec.yml --deployment-dir monitoring-testnet-deployment
```
## Configure
### Prometheus scrape config
- Setup the following scrape configs in prometheus config file (`monitoring-testnet-deployment/config/monitoring/prometheus/prometheus.yml`) in the deployment folder:
```yml
...
- job_name: 'blackbox'
...
static_configs:
- targets: ["https://wallet.laconic.com"]
labels:
alias: "Wallet App"
url: "https://wallet.laconic.com"
- targets: ["https://laconicd-sapo.laconic.com"]
labels:
alias: "Node laconicd"
url: "https://laconicd-sapo.laconic.com"
- targets: ["https://console-sapo.laconic.com"]
labels:
alias: "Console App"
url: "https://console-sapo.laconic.com"
- targets: ["https://fixturenet-eth.laconic.com"]
labels:
alias: "Fixturenet ETH"
url: "https://fixturenet-eth.laconic.com"
- targets: ["https://deploy.laconic.com"]
labels:
alias: "Deploy App"
url: "https://deploy.laconic.com"
- targets: ["https://deploy-backend.laconic.com/staging/version"]
labels:
alias: "Deploy Backend"
url: "https://deploy-backend.laconic.com/staging/version"
- targets: ["https://container-registry.apps.vaasl.io"]
labels:
alias: "Container Registry"
url: "https://container-registry.apps.vaasl.io"
- targets: ["https://webapp-deployer-api.apps.vaasl.io"]
labels:
alias: "Webapp Deployer API"
url: "https://webapp-deployer-api.apps.vaasl.io"
- targets: ["https://webapp-deployer-ui.apps.vaasl.io"]
labels:
alias: "Webapp Deployer UI"
url: "https://webapp-deployer-ui.apps.vaasl.io"
...
- job_name: laconicd
...
static_configs:
- targets: ['LACONICD_REST_HOST:LACONICD_REST_PORT']
# Example: 'host.docker.internal:3317'
```
- Remove docker compose services which are not required in `monitoring-testnet-deployment/compose/docker-compose-prom-server.yml`
- `ethereum-chain-head-exporter`
- `filecoin-chain-head-exporter`
- `graph-node-upstream-head-exporter`
- `postgres-exporter`
### Grafana dashboards
Remove some of the existing dashboards which are not required in monitoring testnet
```
cd monitoring-testnet-deployment/config/monitoring/grafana/dashboards
rm postgres-dashboard.json subgraphs-dashboard.json watcher-dashboard.json
cd -
```
<!-- TODO: Check node-exporter-full.json, nodejs-app-dashboard.json -->
### Grafana alerts config
Place the pre-configured alerts rules in Grafana provisioning directory:
```bash
# watcher alert rules
cp monitoring-testnet-deployment/config/monitoring/testnet-alert-rules.yml monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/
```
Update the alerting contact points config (`monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/contactpoints.yml`) with desired contact points
Add corresponding routes to the notification policies config (`monitoring-testnet-deployment/config/monitoring/grafana/provisioning/alerting/policies.yml`) with appropriate object-matchers:
```yml
...
routes:
- receiver: SlackNotifier
object_matchers:
# Add matchers below
- ['grafana_folder', '=~', 'TestnetAlerts']
```
### Env
Set the following env variables in the deployment env config file (`monitoring-testnet-deployment/config.env`):
```bash
# Grafana server host URL to be used
# (Optional, default: http://localhost:3000)
GF_SERVER_ROOT_URL=
```
## Start the stack
Start the deployment:
```bash
laconic-so deployment --dir monitoring-testnet-deployment start
```
* List and check the health status of all the containers using `docker ps` and wait for them to be `healthy`
* Grafana should now be visible at http://localhost:3000 with configured dashboards
## Clean up
To stop monitoring services running in the background, while preserving data:
```bash
# Only stop the docker containers
laconic-so deployment --dir monitoring-watchers-deployment stop
# Run 'start' to restart the deployment
```
To stop monitoring services and also delete data:
```bash
# Stop the docker containers
laconic-so deployment --dir monitoring-watchers-deployment stop --delete-volumes
# Remove deployment directory (deployment will have to be recreated for a re-run)
rm -rf monitoring-watchers-deployment
```

View File

@ -44,12 +44,9 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
- job_name: 'blackbox'
...
static_configs:
- targets: ["<AZIMUTH_GATEWAY_GQL_ENDPOINT>"]
labels:
alias: "Azimuth Watcher"
- targets: ["<LACONICD_GQL_ENDPOINT>"]
labels:
alias: "Node (laconicd)"
- targets:
- <AZIMUTH_GATEWAY_GQL_ENDPOINT>
- <LACONICD_GQL_ENDPOINT>
...
- job_name: laconicd
static_configs: