Improve transport connection errors (#4540)

## Issue Addressed

#4538 

## Proposed Changes

add newtype wrapper around DialError that extracts error messages and logs them in a more readable format

## Additional Info

I was able to test Transport Dial Errors in the situation where a libp2p instance attempts to ping a nonexistent peer. That error message should look something like

`A transport level error has ocurred: Connection refused (os error 61)`

AgeManning mentioned we should try fetching only the most inner error (in situations where theres a nested error). I took a stab at implementing that

For non transport DialErrors, I wrote out the error messages explicitly (as per the docs). Could potentially clean things up here if thats not necessary


Co-authored-by: Age Manning <Age@AgeManning.com>
This commit is contained in:
Eitan Seri-Levi 2023-08-10 00:10:09 +00:00
parent b60304b19f
commit 1fcada8a32
3 changed files with 45 additions and 4 deletions

View File

@ -7,9 +7,9 @@ pub(crate) mod enr;
pub mod enr_ext;
// Allow external use of the lighthouse ENR builder
use crate::metrics;
use crate::service::TARGET_SUBNET_PEERS;
use crate::{error, Enr, NetworkConfig, NetworkGlobals, Subnet, SubnetDiscovery};
use crate::{metrics, ClearDialError};
use discv5::{enr::NodeId, Discv5, Discv5Event};
pub use enr::{
build_enr, create_enr_builder_from_config, load_enr_from_disk, use_or_load_enr, CombinedKey,
@ -1111,7 +1111,7 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
| DialError::Transport(_)
| DialError::WrongPeerId { .. } => {
// set peer as disconnected in discovery DHT
debug!(self.log, "Marking peer disconnected in DHT"; "peer_id" => %peer_id);
debug!(self.log, "Marking peer disconnected in DHT"; "peer_id" => %peer_id, "error" => %ClearDialError(error));
self.disconnect_peer(&peer_id);
}
DialError::DialPeerConditionFalse(_) | DialError::Aborted => {}

View File

@ -17,6 +17,7 @@ pub mod rpc;
pub mod types;
pub use config::gossip_max_size;
use libp2p::swarm::DialError;
pub use listen_addr::*;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
@ -63,6 +64,46 @@ impl<'de> Deserialize<'de> for PeerIdSerialized {
}
}
// A wrapper struct that prints a dial error nicely.
struct ClearDialError<'a>(&'a DialError);
impl<'a> ClearDialError<'a> {
fn most_inner_error(err: &(dyn std::error::Error)) -> &(dyn std::error::Error) {
let mut current = err;
while let Some(source) = current.source() {
current = source;
}
current
}
}
impl<'a> std::fmt::Display for ClearDialError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
match &self.0 {
DialError::Transport(errors) => {
for (_, transport_error) in errors {
match transport_error {
libp2p::TransportError::MultiaddrNotSupported(multiaddr_error) => {
write!(f, "Multiaddr not supported: {multiaddr_error}")?;
}
libp2p::TransportError::Other(other_error) => {
let inner_error = ClearDialError::most_inner_error(other_error);
write!(f, "Transport error: {inner_error}")?;
}
}
}
Ok(())
}
DialError::LocalPeerId { .. } => write!(f, "The peer being dialed is the local peer."),
DialError::NoAddresses => write!(f, "No addresses for the peer to dial."),
DialError::DialPeerConditionFalse(_) => write!(f, "PeerCondition evaluation failed."),
DialError::Aborted => write!(f, "Connection aborted."),
DialError::WrongPeerId { .. } => write!(f, "Wrong peer id."),
DialError::Denied { cause } => write!(f, "Connection denied: {:?}", cause),
}
}
}
pub use crate::types::{
error, Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage, Subnet,
SubnetDiscovery,

View File

@ -12,9 +12,9 @@ use libp2p::swarm::{ConnectionId, NetworkBehaviour, PollParameters, ToSwarm};
use slog::{debug, error};
use types::EthSpec;
use crate::metrics;
use crate::rpc::GoodbyeReason;
use crate::types::SyncState;
use crate::{metrics, ClearDialError};
use super::peerdb::BanResult;
use super::{ConnectingType, PeerManager, PeerManagerEvent, ReportSource};
@ -132,7 +132,7 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
error,
connection_id: _,
}) => {
debug!(self.log, "Failed to dial peer"; "peer_id"=> ?peer_id, "error" => %error);
debug!(self.log, "Failed to dial peer"; "peer_id"=> ?peer_id, "error" => %ClearDialError(error));
self.on_dial_failure(peer_id);
}
FromSwarm::ExternalAddrConfirmed(_) => {