Upgrade ajna and sushi-v3 watchers to switch RPC endpoint on slow eth_getLogs request #862

Open
nabarun wants to merge 6 commits from deep-stack/stack-orchestrator:ng-watchers-switch-slow-logs into main
7 changed files with 219 additions and 194 deletions

View File

@ -1745,7 +1745,7 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Total number of failed ETH RPC requests by method and provider endpoint ",
"description": "Total number of ETH RPC requests by method and provider endpoint ",
"fieldConfig": {
"defaults": {
"color": {
@ -1798,7 +1798,7 @@
},
"gridPos": {
"h": 6,
"w": 12,
"w": 8,
"x": 0,
"y": 21
},
@ -1825,7 +1825,7 @@
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "watcher_eth_rpc_errors{job=~\"$job\", instance=~\"$watcher\"}",
"expr": "sum by(method) (watcher_eth_rpc_total{job=~\"$job\", instance=~\"$watcher\"})",
"format": "time_series",
"fullMetaSearch": false,
"includeNullMetadata": true,
@ -1836,7 +1836,7 @@
"useBackend": false
}
],
"title": "ETH RPC request failures",
"title": "ETH RPC Total",
"type": "timeseries"
},
{
@ -1901,8 +1901,8 @@
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"w": 8,
"x": 8,
"y": 21
},
"id": 26,
@ -1935,105 +1935,6 @@
"title": "ETH RPC request durations",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Total number of failed ETH RPC requests by method and provider endpoint (across all watchers)",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 27
},
"id": 30,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "sum by (method, provider) (watcher_eth_rpc_errors{chain=\"$target_chain\"})",
"format": "time_series",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{method}}, {{provider}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Total ETH RPC request failures (across all watchers)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
@ -2100,9 +2001,9 @@
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 27
"w": 8,
"x": 16,
"y": 21
},
"id": 32,
"options": {
@ -2170,6 +2071,204 @@
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Total number of failed ETH RPC requests by method and provider endpoint ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 27
},
"id": 39,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "watcher_eth_rpc_errors{job=~\"$job\", instance=~\"$watcher\"}",
"format": "time_series",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{method}}, {{provider}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "ETH RPC request failures",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Total number of failed ETH RPC requests by method and provider endpoint (across all watchers)",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 27
},
"id": 30,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "sum by (method, provider) (watcher_eth_rpc_errors{chain=\"$target_chain\"})",
"format": "time_series",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{method}}, {{provider}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Total ETH RPC request failures (across all watchers)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",

View File

@ -695,82 +695,6 @@ groups:
annotations:
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
isPaused: false
- uid: merkl_sushiswap_diff_external
title: merkl_sushiswap_watcher_head_tracking
condition: condition
data:
- refId: diff
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
datasource:
type: prometheus
uid: PBFA97CFB590B2093
disableTextWrap: false
editorMode: code
expr: latest_block_number{instance="external"} - on(chain) group_right sync_status_block_number{job="sushi", instance="merkl_sushiswap", kind="latest_indexed"}
fullMetaSearch: false
includeNullMetadata: true
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: diff
useBackend: false
- refId: latest_external
relativeTimeRange:
from: 600
to: 0
datasourceUid: PBFA97CFB590B2093
model:
datasource:
type: prometheus
uid: PBFA97CFB590B2093
editorMode: code
expr: latest_block_number{chain="filecoin"}
hide: false
instant: true
legendFormat: __auto
range: false
refId: latest_external
- refId: condition
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: ${diff} >= 16
intervalMs: 1000
maxDataPoints: 43200
refId: condition
type: math
noDataState: Alerting
execErrState: Alerting
for: 15m
annotations:
summary: Watcher {{ index $labels "instance" }} of group {{ index $labels "job" }} is falling behind external head by {{ index $values "diff" }}
isPaused: false
# Ajna
- uid: ajna_diff_external

View File

@ -10,7 +10,6 @@
checkpointInterval = 2000
# Enable state creation
# CAUTION: Disable only if state creation is not desired or can be filled subsequently
enableState = false
subgraphPath = "./subgraph-build"
@ -77,6 +76,10 @@
# Boolean flag to filter event logs by topics
filterLogsByTopics = true
# Switch clients if eth_getLogs call takes more than threshold (in secs)
# Set to 0 for disabling switching
getLogsClientSwitchThresholdInSecs = 30
[upstream.cache]
name = "requests"
enabled = false
@ -92,7 +95,7 @@
blockDelayInMilliSecs = 30000
# Number of blocks by which block processing lags behind head
blockProcessingOffset = 0
blockProcessingOffset = 3
# Boolean to switch between modes of processing events when starting the server.
# Setting to true will fetch filtered events and required blocks in a range of blocks and then process them.

View File

@ -10,7 +10,6 @@
checkpointInterval = 2000
# Enable state creation
# CAUTION: Disable only if state creation is not desired or can be filled subsequently
enableState = false
subgraphPath = "./subgraph-build"
@ -77,6 +76,10 @@
# Boolean flag to filter event logs by topics
filterLogsByTopics = true
# Switch clients if eth_getLogs call takes more than threshold (in secs)
# Set to 0 for disabling switching
getLogsClientSwitchThresholdInSecs = 30
[upstream.cache]
name = "requests"
enabled = false
@ -92,7 +95,7 @@
blockDelayInMilliSecs = 30000
# Number of blocks by which block processing lags behind head
blockProcessingOffset = 0
blockProcessingOffset = 3
# Boolean to switch between modes of processing events when starting the server.
# Setting to true will fetch filtered events and required blocks in a range of blocks and then process them.

View File

@ -2,7 +2,7 @@ version: "1.0"
name: ajna
description: "Ajna watcher stack"
repos:
- git.vdb.to/cerc-io/ajna-watcher-ts@v0.1.13
- git.vdb.to/cerc-io/ajna-watcher-ts@v0.1.16
containers:
- cerc/watcher-ajna
pods:

View File

@ -99,10 +99,6 @@ Add the following scrape configs to prometheus config file (`monitoring-watchers
labels:
instance: 'sushiswap'
chain: 'filecoin'
- targets: ['MERKLE_SUSHISWAP_WATCHER_HOST:MERKLE_SUSHISWAP_WATCHER_METRICS_PORT', 'MERKLE_SUSHISWAP_WATCHER_HOST:MERKLE_SUSHISWAP_WATCHER_GQL_METRICS_PORT']
labels:
instance: 'merkl_sushiswap'
chain: 'filecoin'
- job_name: ajna
scrape_interval: 20s

View File

@ -2,7 +2,7 @@ version: "1.0"
name: sushiswap-v3
description: "SushiSwap v3 watcher stack"
repos:
- github.com/cerc-io/sushiswap-v3-watcher-ts@v0.1.14
- github.com/cerc-io/sushiswap-v3-watcher-ts@v0.1.16
containers:
- cerc/watcher-sushiswap-v3
pods: