Return more detail in Eth1 HTTP errors (#2383)
## Issue Addressed NA ## Proposed Changes Whilst investigating #2372, I [learned](https://github.com/sigp/lighthouse/issues/2372#issuecomment-851725049) that the error message returned from some failed Eth1 requests are always `NotReachable`. This makes debugging quite painful. This PR adds more detail to these errors. For example: - Bad infura key: `ERRO Failed to update eth1 cache error: Failed to update Eth1 service: "All fallback errored: https://mainnet.infura.io/ => EndpointError(RequestFailed(\"Response HTTP status was not 200 OK: 401 Unauthorized.\"))", retry_millis: 60000, service: eth1_rpc` - Unreachable server: `ERRO Failed to update eth1 cache error: Failed to update Eth1 service: "All fallback errored: http://127.0.0.1:8545/ => EndpointError(RequestFailed(\"Request failed: reqwest::Error { kind: Request, url: Url { scheme: \\\"http\\\", cannot_be_a_base: false, username: \\\"\\\", password: None, host: Some(Ipv4(127.0.0.1)), port: Some(8545), path: \\\"/\\\", query: None, fragment: None }, source: hyper::Error(Connect, ConnectError(\\\"tcp connect error\\\", Os { code: 111, kind: ConnectionRefused, message: \\\"Connection refused\\\" })) }\"))", retry_millis: 60000, service: eth1_rpc` - Bad server: `ERRO Failed to update eth1 cache error: Failed to update Eth1 service: "All fallback errored: http://127.0.0.1:8545/ => EndpointError(RequestFailed(\"Response HTTP status was not 200 OK: 501 Not Implemented.\"))", retry_millis: 60000, service: eth1_rpc` ## Additional Info NA
This commit is contained in:
parent
4c7bb4984c
commit
ba9c4c5eea
@ -44,9 +44,9 @@ const WARNING_MSG: &str = "BLOCK PROPOSALS WILL FAIL WITHOUT VALID, SYNCED ETH1
|
|||||||
/// A factor used to reduce the eth1 follow distance to account for discrepancies in the block time.
|
/// A factor used to reduce the eth1 follow distance to account for discrepancies in the block time.
|
||||||
const ETH1_BLOCK_TIME_TOLERANCE_FACTOR: u64 = 4;
|
const ETH1_BLOCK_TIME_TOLERANCE_FACTOR: u64 = 4;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
pub enum EndpointError {
|
pub enum EndpointError {
|
||||||
NotReachable,
|
RequestFailed(String),
|
||||||
WrongNetworkId,
|
WrongNetworkId,
|
||||||
WrongChainId,
|
WrongChainId,
|
||||||
FarBehind,
|
FarBehind,
|
||||||
@ -73,7 +73,7 @@ async fn reset_endpoint_state(endpoint: &EndpointWithState) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn get_state(endpoint: &EndpointWithState) -> Option<EndpointState> {
|
async fn get_state(endpoint: &EndpointWithState) -> Option<EndpointState> {
|
||||||
*endpoint.state.read().await
|
endpoint.state.read().await.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A cache structure to lazily check usability of endpoints. An endpoint is usable if it is
|
/// A cache structure to lazily check usability of endpoints. An endpoint is usable if it is
|
||||||
@ -90,11 +90,11 @@ impl EndpointsCache {
|
|||||||
/// Checks the usability of an endpoint. Results get cached and therefore only the first call
|
/// Checks the usability of an endpoint. Results get cached and therefore only the first call
|
||||||
/// for each endpoint does the real check.
|
/// for each endpoint does the real check.
|
||||||
async fn state(&self, endpoint: &EndpointWithState) -> EndpointState {
|
async fn state(&self, endpoint: &EndpointWithState) -> EndpointState {
|
||||||
if let Some(result) = *endpoint.state.read().await {
|
if let Some(result) = endpoint.state.read().await.clone() {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
let mut value = endpoint.state.write().await;
|
let mut value = endpoint.state.write().await;
|
||||||
if let Some(result) = *value {
|
if let Some(result) = value.clone() {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
crate::metrics::inc_counter_vec(
|
crate::metrics::inc_counter_vec(
|
||||||
@ -108,7 +108,7 @@ impl EndpointsCache {
|
|||||||
&self.log,
|
&self.log,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
*value = Some(state);
|
*value = Some(state.clone());
|
||||||
if state.is_err() {
|
if state.is_err() {
|
||||||
crate::metrics::inc_counter_vec(
|
crate::metrics::inc_counter_vec(
|
||||||
&crate::metrics::ENDPOINT_ERRORS,
|
&crate::metrics::ENDPOINT_ERRORS,
|
||||||
@ -147,7 +147,7 @@ impl EndpointsCache {
|
|||||||
&[endpoint_str],
|
&[endpoint_str],
|
||||||
);
|
);
|
||||||
if let SingleEndpointError::EndpointError(e) = &t {
|
if let SingleEndpointError::EndpointError(e) = &t {
|
||||||
*endpoint.state.write().await = Some(Err(*e));
|
*endpoint.state.write().await = Some(Err(e.clone()));
|
||||||
} else {
|
} else {
|
||||||
// A non-`EndpointError` error occurred, so reset the state.
|
// A non-`EndpointError` error occurred, so reset the state.
|
||||||
reset_endpoint_state(endpoint).await;
|
reset_endpoint_state(endpoint).await;
|
||||||
@ -181,14 +181,14 @@ async fn endpoint_state(
|
|||||||
config_chain_id: &Eth1Id,
|
config_chain_id: &Eth1Id,
|
||||||
log: &Logger,
|
log: &Logger,
|
||||||
) -> EndpointState {
|
) -> EndpointState {
|
||||||
let error_connecting = |_| {
|
let error_connecting = |e| {
|
||||||
warn!(
|
warn!(
|
||||||
log,
|
log,
|
||||||
"Error connecting to eth1 node endpoint";
|
"Error connecting to eth1 node endpoint";
|
||||||
"endpoint" => %endpoint,
|
"endpoint" => %endpoint,
|
||||||
"action" => "trying fallbacks"
|
"action" => "trying fallbacks"
|
||||||
);
|
);
|
||||||
EndpointError::NotReachable
|
EndpointError::RequestFailed(e)
|
||||||
};
|
};
|
||||||
let network_id = get_network_id(endpoint, Duration::from_millis(STANDARD_TIMEOUT_MILLIS))
|
let network_id = get_network_id(endpoint, Duration::from_millis(STANDARD_TIMEOUT_MILLIS))
|
||||||
.await
|
.await
|
||||||
|
Loading…
Reference in New Issue
Block a user