deneb related logging improvements (#4859)

1. Add commitments to logs and update the `Display` implementation of `KzgCommitment` to become truncated similarly to block root.

I've been finding it difficult to debug scenarios involving multiple blobs for the same `(index, block_root)`. Logging the commitment will help with this, we can match it to what exists in the block. 

Example output:

```
Oct 20 21:13:36.700 DEBG Successfully verified gossip blob       commitment: 0xa3c1…1cd8, index: 0, root: 0xf31e…f9de, slot: 154568
Oct 20 21:13:36.785 DEBG Successfully verified gossip block      commitments: [0xa3c1…1cd8, 0x8655…02ff, 0x8d6a…955a, 0x84ac…3a1b, 0x9752…629b, 0xb9fc…20fb], root: 0xf31eeb732702e429e89057b15e1c0c631e8452e09e03cb1924353f536ef4f9de, slot: 154568, graffiti: teku/besu, service: beacon
```

Example output in a block with no blobs (this will show up pre-deneb):
```
426734:Oct 20 21:15:24.113 DEBG Successfully verified gossip block, commitments: [], root: 0x619db1360ba0e8d44ae2a0f2450ebca47e167191feecffcfac0e8d7b6c39623c, slot: 154577, graffiti: teku/nethermind, service: beacon, module: beacon_chain::beacon_chain:2765
```

2. Remove `strum::IntoStaticStr` from `AvailabilityCheckError`. This is because `IntoStaticStr` end up dropping information inside the enum. So kzg commitments in this error are dropped, making it more difficult to debug
```
AvailabilityCheckError::KzgCommitmentMismatch {
        blob_commitment: KzgCommitment,
        block_commitment: KzgCommitment,
    },
```
which is output as just `AvailabilityCheckError`

3. Some additional misc sync logs I found useful in debugging https://github.com/sigp/lighthouse/pull/4869

4. This downgrades ”Block returned for single block lookup not present” to debug because I don’t think we can fix the scenario that causes this unless we can cancel inflight rpc requests

Co-authored-by: realbigsean <seananderson33@gmail.com>
This commit is contained in:
realbigsean 2023-10-25 15:30:17 +00:00
parent d4f26ee123
commit ba891e1fed
14 changed files with 181 additions and 31 deletions

View File

@ -2761,12 +2761,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
match GossipVerifiedBlock::new(block, &chain) {
Ok(verified) => {
let commitments_formatted = verified.block.commitments_formatted();
debug!(
chain.log,
"Successfully verified gossip block";
"graffiti" => graffiti_string,
"slot" => slot,
"root" => ?verified.block_root(),
"commitments" => commitments_formatted,
);
Ok(verified)

View File

@ -38,6 +38,13 @@ impl<E: EthSpec> RpcBlock<E> {
pub fn block_root(&self) -> Hash256 {
self.block_root
}
pub fn as_block(&self) -> &SignedBeaconBlock<E> {
match &self.block {
RpcBlockInner::Block(block) => block,
RpcBlockInner::BlockAndBlobs(block, _) => block,
}
}
}
/// Note: This variant is intentionally private because we want to safely construct the

View File

@ -1,8 +1,7 @@
use kzg::{Error as KzgError, KzgCommitment};
use strum::IntoStaticStr;
use types::{BeaconStateError, Hash256};
#[derive(Debug, IntoStaticStr)]
#[derive(Debug)]
pub enum Error {
Kzg(KzgError),
KzgNotInitialized,

View File

@ -613,6 +613,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let slot = signed_blob.message.slot;
let root = signed_blob.message.block_root;
let index = signed_blob.message.index;
let commitment = signed_blob.message.kzg_commitment;
let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(&metrics::BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME, delay);
@ -633,6 +634,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"proposer_index" => gossip_verified_blob.proposer_index(),
"slot" => gossip_verified_blob.slot(),
"delay" => ?delay,
"commitment" => %gossip_verified_blob.kzg_commitment(),
);
}
@ -641,7 +643,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"Successfully verified gossip blob";
"slot" => %slot,
"root" => %root,
"index" => %index
"index" => %index,
"commitment" => %gossip_verified_blob.kzg_commitment(),
);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
@ -668,7 +671,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"Unknown parent hash for blob";
"action" => "requesting parent",
"block_root" => %blob.block_root,
"parent_root" => %blob.block_parent_root
"parent_root" => %blob.block_parent_root,
"commitment" => %commitment,
);
self.send_sync_message(SyncMessage::UnknownParentBlob(peer_id, blob));
}
@ -683,7 +687,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"error" => ?err,
"slot" => %slot,
"root" => %root,
"index" => %index
"index" => %index,
"commitment" => %commitment,
);
// Prevent recurring behaviour by penalizing the peer slightly.
self.gossip_penalize_peer(
@ -707,7 +712,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"error" => ?err,
"slot" => %slot,
"root" => %root,
"index" => %index
"index" => %index,
"commitment" => %commitment,
);
// Prevent recurring behaviour by penalizing the peer slightly.
self.gossip_penalize_peer(
@ -743,13 +749,13 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
.should_delay_lookup(blob_slot);
match self.chain.process_gossip_blob(verified_blob).await {
Ok(AvailabilityProcessingStatus::Imported(hash)) => {
Ok(AvailabilityProcessingStatus::Imported(block_root)) => {
// Note: Reusing block imported metric here
metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_IMPORTED_TOTAL);
info!(
self.log,
"Gossipsub blob processed, imported fully available block";
"hash" => %hash
"block_root" => %block_root
);
self.chain.recompute_head_at_current_slot().await;
}

View File

@ -224,7 +224,15 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
request_id: PeerRequestId,
request: BlobsByRootRequest,
) {
let requested_blobs = request.blob_ids.len();
let Some(requested_root) = request.blob_ids.get(0).map(|id| id.block_root) else {
// No blob ids requested.
return;
};
let requested_indices = request
.blob_ids
.iter()
.map(|id| id.index)
.collect::<Vec<_>>();
let mut send_blob_count = 0;
let send_response = true;
@ -277,7 +285,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
self.log,
"Received BlobsByRoot Request";
"peer" => %peer_id,
"requested" => requested_blobs,
"request_root" => %requested_root,
"request_indices" => ?requested_indices,
"returned" => send_blob_count
);

View File

@ -24,7 +24,9 @@ use slot_clock::SlotClock;
use std::sync::Arc;
use std::time::Duration;
use std::time::{SystemTime, UNIX_EPOCH};
use store::KzgCommitment;
use tokio::sync::mpsc;
use types::beacon_block_body::format_kzg_commitments;
use types::blob_sidecar::FixedBlobSidecarList;
use types::{Epoch, Hash256, Slot};
@ -212,6 +214,16 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let slot = block.slot();
let parent_root = block.message().parent_root();
let commitments_formatted = block.as_block().commitments_formatted();
debug!(
self.log,
"Processing RPC block";
"block_root" => ?block_root,
"proposer" => block.message().proposer_index(),
"slot" => block.slot(),
"commitments" => commitments_formatted,
);
let result = self
.chain
@ -288,10 +300,15 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
return;
};
let indices: Vec<_> = blobs
let (indices, commitments): (Vec<u64>, Vec<KzgCommitment>) = blobs
.iter()
.filter_map(|blob_opt| blob_opt.as_ref().map(|blob| blob.index))
.collect();
.filter_map(|blob_opt| {
blob_opt
.as_ref()
.map(|blob| (blob.index, blob.kzg_commitment))
})
.unzip();
let commitments = format_kzg_commitments(&commitments);
debug!(
self.log,
@ -299,6 +316,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"indices" => ?indices,
"block_root" => %block_root,
"slot" => %slot,
"commitments" => commitments,
);
if let Ok(current_slot) = self.chain.slot() {
@ -332,6 +350,14 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"slot" => %slot,
);
}
Err(BlockError::BlockIsAlreadyKnown) => {
debug!(
self.log,
"Blobs have already been imported";
"block_hash" => %block_root,
"slot" => %slot,
);
}
Err(e) => {
warn!(
self.log,

View File

@ -330,7 +330,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
let Some(lookup) = self.get_single_lookup::<R>(lookup_id) else {
if response.is_some() {
warn!(
// We don't have the ability to cancel in-flight RPC requests. So this can happen
// if we started this RPC request, and later saw the block/blobs via gossip.
debug!(
self.log,
"Block returned for single block lookup not present";
"response_type" => ?response_type,

View File

@ -30,7 +30,7 @@ impl<T: EthSpec> BlocksAndBlobsRequestInfo<T> {
}
}
pub fn into_responses(self) -> Result<Vec<RpcBlock<T>>, &'static str> {
pub fn into_responses(self) -> Result<Vec<RpcBlock<T>>, String> {
let BlocksAndBlobsRequestInfo {
accumulated_blocks,
accumulated_sidecars,
@ -50,28 +50,28 @@ impl<T: EthSpec> BlocksAndBlobsRequestInfo<T> {
.unwrap_or(false);
pair_next_blob
} {
blob_list.push(blob_iter.next().ok_or("Missing next blob")?);
blob_list.push(blob_iter.next().ok_or("Missing next blob".to_string())?);
}
let mut blobs_buffer = vec![None; T::max_blobs_per_block()];
for blob in blob_list {
let blob_index = blob.index as usize;
let Some(blob_opt) = blobs_buffer.get_mut(blob_index) else {
return Err("Invalid blob index");
return Err("Invalid blob index".to_string());
};
if blob_opt.is_some() {
return Err("Repeat blob index");
return Err("Repeat blob index".to_string());
} else {
*blob_opt = Some(blob);
}
}
let blobs = VariableList::from(blobs_buffer.into_iter().flatten().collect::<Vec<_>>());
responses.push(RpcBlock::new(None, block, Some(blobs))?)
responses.push(RpcBlock::new(None, block, Some(blobs)).map_err(|e| format!("{e:?}"))?)
}
// if accumulated sidecars is not empty, throw an error.
if blob_iter.next().is_some() {
return Err("Received sidecars that don't pair well");
return Err("Received sidecars that don't pair well".to_string());
}
Ok(responses)

View File

@ -1034,8 +1034,11 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// inform range that the request needs to be treated as failed
// With time we will want to downgrade this log
warn!(
self.log, "Blocks and blobs request for range received invalid data";
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e
self.log,
"Blocks and blobs request for range received invalid data";
"peer_id" => %peer_id,
"batch_id" => resp.batch_id,
"error" => e.clone()
);
let id = RequestId::RangeBlockAndBlobs { id };
self.network.report_peer(
@ -1043,7 +1046,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
PeerAction::MidToleranceError,
"block_blob_faulty_batch",
);
self.inject_error(peer_id, id, RPCError::InvalidData(e.into()))
self.inject_error(peer_id, id, RPCError::InvalidData(e))
}
}
}
@ -1091,7 +1094,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// With time we will want to downgrade this log
warn!(
self.log, "Blocks and blobs request for backfill received invalid data";
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e.clone()
);
let id = RequestId::BackFillBlockAndBlobs { id };
self.network.report_peer(
@ -1099,7 +1102,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
PeerAction::MidToleranceError,
"block_blob_faulty_backfill_batch",
);
self.inject_error(peer_id, id, RPCError::InvalidData(e.into()))
self.inject_error(peer_id, id, RPCError::InvalidData(e))
}
}
}

View File

@ -23,7 +23,7 @@ use types::{BlobSidecar, EthSpec, SignedBeaconBlock};
pub struct BlocksAndBlobsByRangeResponse<T: EthSpec> {
pub batch_id: BatchId,
pub responses: Result<Vec<RpcBlock<T>>, &'static str>,
pub responses: Result<Vec<RpcBlock<T>>, String>,
}
pub struct BlocksAndBlobsByRangeRequest<T: EthSpec> {
@ -338,12 +338,26 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
request_id: Id,
batch_type: ByRangeRequestType,
) -> Option<(ChainId, BatchId)> {
match batch_type {
let req = match batch_type {
ByRangeRequestType::BlocksAndBlobs => self
.range_blocks_and_blobs_requests
.remove(&request_id)
.map(|req| (req.chain_id, req.batch_id)),
ByRangeRequestType::Blocks => self.range_requests.remove(&request_id),
};
if let Some(req) = req {
debug!(
self.log,
"Range sync request failed";
"request_id" => request_id,
"batch_type" => ?batch_type,
"chain_id" => ?req.0,
"batch_id" => ?req.1
);
Some(req)
} else {
debug!(self.log, "Range sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type);
None
}
}
@ -352,12 +366,25 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
request_id: Id,
batch_type: ByRangeRequestType,
) -> Option<BatchId> {
match batch_type {
let batch_id = match batch_type {
ByRangeRequestType::BlocksAndBlobs => self
.backfill_blocks_and_blobs_requests
.remove(&request_id)
.map(|(batch_id, _info)| batch_id),
ByRangeRequestType::Blocks => self.backfill_requests.remove(&request_id),
};
if let Some(batch_id) = batch_id {
debug!(
self.log,
"Backfill sync request failed";
"request_id" => request_id,
"batch_type" => ?batch_type,
"batch_id" => ?batch_id
);
Some(batch_id)
} else {
debug!(self.log, "Backfill sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type);
None
}
}

View File

@ -827,9 +827,24 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// sending an error /timeout) if the peer is removed from the chain for other
// reasons. Check that this block belongs to the expected peer
if !batch.is_expecting_block(peer_id, &request_id) {
debug!(
self.log,
"Batch not expecting block";
"batch_epoch" => batch_id,
"batch_state" => ?batch.state(),
"peer_id" => %peer_id,
"request_id" => %request_id
);
return Ok(KeepChain);
}
debug!(self.log, "Batch failed. RPC Error"; "batch_epoch" => batch_id);
debug!(
self.log,
"Batch failed. RPC Error";
"batch_epoch" => batch_id,
"batch_state" => ?batch.state(),
"peer_id" => %peer_id,
"request_id" => %request_id
);
if let Some(active_requests) = self.peers.get_mut(peer_id) {
active_requests.remove(&batch_id);
}
@ -841,6 +856,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
}
self.retry_batch_download(network, batch_id)
} else {
debug!(
self.log,
"Batch not found";
"batch_epoch" => batch_id,
"peer_id" => %peer_id,
"request_id" => %request_id
);
// this could be an error for an old batch, removed when the chain advances
Ok(KeepChain)
}

View File

@ -508,6 +508,14 @@ impl<E: EthSpec> From<BeaconBlockBody<E, FullPayload<E>>>
}
}
/// Util method helpful for logging.
pub fn format_kzg_commitments(commitments: &[KzgCommitment]) -> String {
let commitment_strings: Vec<String> = commitments.iter().map(|x| x.to_string()).collect();
let commitments_joined = commitment_strings.join(", ");
let surrounded_commitments = format!("[{}]", commitments_joined);
surrounded_commitments
}
#[cfg(test)]
mod tests {
mod base {

View File

@ -1,3 +1,4 @@
use crate::beacon_block_body::format_kzg_commitments;
use crate::*;
use bls::Signature;
use derivative::Derivative;
@ -256,6 +257,15 @@ impl<E: EthSpec, Payload: AbstractExecPayload<E>> SignedBeaconBlock<E, Payload>
.map(|c| c.len())
.unwrap_or(0)
}
/// Used for displaying commitments in logs.
pub fn commitments_formatted(&self) -> String {
let Ok(commitments) = self.message().body().blob_kzg_commitments() else {
return "[]".to_string();
};
format_kzg_commitments(commitments.as_ref())
}
}
// We can convert pre-Bellatrix blocks without payloads into blocks with payloads.

View File

@ -36,7 +36,15 @@ impl From<KzgCommitment> for c_kzg::Bytes48 {
impl Display for KzgCommitment {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", serde_utils::hex::encode(self.0))
write!(f, "0x")?;
for i in &self.0[0..2] {
write!(f, "{:02x}", i)?;
}
write!(f, "")?;
for i in &self.0[BYTES_PER_COMMITMENT - 2..BYTES_PER_COMMITMENT] {
write!(f, "{:02x}", i)?;
}
Ok(())
}
}
@ -63,7 +71,7 @@ impl Serialize for KzgCommitment {
where
S: Serializer,
{
serializer.serialize_str(&self.to_string())
serializer.serialize_str(&format!("{:?}", self))
}
}
@ -113,3 +121,24 @@ impl arbitrary::Arbitrary<'_> for KzgCommitment {
Ok(KzgCommitment(bytes))
}
}
#[test]
fn kzg_commitment_display() {
let display_commitment_str = "0x53fa…adac";
let display_commitment = KzgCommitment::from_str(
"0x53fa09af35d1d1a9e76f65e16112a9064ce30d1e4e2df98583f0f5dc2e7dd13a4f421a9c89f518fafd952df76f23adac",
)
.unwrap()
.to_string();
assert_eq!(display_commitment, display_commitment_str);
}
#[test]
fn kzg_commitment_debug() {
let debug_commitment_str =
"0x53fa09af35d1d1a9e76f65e16112a9064ce30d1e4e2df98583f0f5dc2e7dd13a4f421a9c89f518fafd952df76f23adac";
let debug_commitment = KzgCommitment::from_str(debug_commitment_str).unwrap();
assert_eq!(format!("{debug_commitment:?}"), debug_commitment_str);
}