fix deneb sync bug (#4869)
## Issue Addressed I observed our forward sync on devnet 9 would stall when we would hit this log: ``` 250425:Oct 19 00:54:17.133 WARN Blocks and blobs request for range received invalid data, error: KzgCommitmentMismatch, batch_id: 4338, peer_id: 16Uiu2HAmHbmkEQFDrJfNuy1aYyAfHkNUwSD9FN7EVAqGJ8YTF9Mh, service: sync, module: network::sync::manager:1036 ``` ## Proposed Changes `range_sync_block_and_blob_response` [here](1cb02a13a5/beacon_node/network/src/sync/manager.rs (L1013)
) removes the request from the sync manager. later, however if there's an error, `inject_error` [here](1cb02a13a5/beacon_node/network/src/sync/manager.rs (L1055)
) expects the request to exist so we can handle retry logic. So this PR just re-inserts the request (withthout any accumulated blobs or blocks) when we hit an error here. The issue is unique to block+blob sync because the error here is only possible from mismatches between blocks + blobs after we've downloaded both, there's no equivalent error in block sync Co-authored-by: realbigsean <seananderson33@gmail.com>
This commit is contained in:
parent
4ce01ddd11
commit
42da392edc
@ -43,6 +43,7 @@ use crate::service::NetworkMessage;
|
||||
use crate::status::ToStatusMessage;
|
||||
use crate::sync::block_lookups::common::{Current, Parent};
|
||||
use crate::sync::block_lookups::{BlobRequestState, BlockRequestState};
|
||||
use crate::sync::network_context::BlocksAndBlobsByRangeRequest;
|
||||
use crate::sync::range_sync::ByRangeRequestType;
|
||||
use beacon_chain::block_verification_types::AsBlock;
|
||||
use beacon_chain::block_verification_types::RpcBlock;
|
||||
@ -1031,6 +1032,14 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// Re-insert the request so we can retry
|
||||
let new_req = BlocksAndBlobsByRangeRequest {
|
||||
chain_id,
|
||||
batch_id: resp.batch_id,
|
||||
block_blob_info: <_>::default(),
|
||||
};
|
||||
self.network
|
||||
.insert_range_blocks_and_blobs_request(id, new_req);
|
||||
// inform range that the request needs to be treated as failed
|
||||
// With time we will want to downgrade this log
|
||||
warn!(
|
||||
@ -1090,6 +1099,13 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// Re-insert the request so we can retry
|
||||
self.network.insert_backfill_blocks_and_blobs_requests(
|
||||
id,
|
||||
resp.batch_id,
|
||||
<_>::default(),
|
||||
);
|
||||
|
||||
// inform backfill that the request needs to be treated as failed
|
||||
// With time we will want to downgrade this log
|
||||
warn!(
|
||||
|
@ -591,4 +591,22 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
ByRangeRequestType::Blocks
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert_range_blocks_and_blobs_request(
|
||||
&mut self,
|
||||
id: Id,
|
||||
request: BlocksAndBlobsByRangeRequest<T::EthSpec>,
|
||||
) {
|
||||
self.range_blocks_and_blobs_requests.insert(id, request);
|
||||
}
|
||||
|
||||
pub fn insert_backfill_blocks_and_blobs_requests(
|
||||
&mut self,
|
||||
id: Id,
|
||||
batch_id: BatchId,
|
||||
request: BlocksAndBlobsRequestInfo<T::EthSpec>,
|
||||
) {
|
||||
self.backfill_blocks_and_blobs_requests
|
||||
.insert(id, (batch_id, request));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user