b0b606dabe
## Issue Addressed NA ## Proposed Changes I've noticed that our block hashing times increase significantly after the merge. I did some flamegraph-ing and noticed that we're allocating a `Vec` for each byte of each execution payload transaction. This seems like unnecessary work and a bit of a fragmentation risk. This PR switches to `SmallVec<[u8; 32]>` for the packed encoding of `TreeHash`. I believe this is a nice simple optimisation with no downside. ### Benchmarking These numbers were computed using #3580 on my desktop (i7 hex-core). You can see a bit of noise in the numbers, that's probably just my computer doing other things. Generally I found this change takes the time from 10-11ms to 8-9ms. I can also see all the allocations disappear from flamegraph. This is the block being benchmarked: https://beaconcha.in/slot/4704236 #### Before ``` [2022-09-15T21:44:19Z INFO lcli::block_root] Run 980: 10.553003ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 981: 10.563737ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 982: 10.646352ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 983: 10.628532ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 984: 10.552112ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 985: 10.587778ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 986: 10.640526ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 987: 10.587243ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 988: 10.554748ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 989: 10.551111ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 990: 11.559031ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 991: 11.944827ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 992: 10.554308ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 993: 11.043397ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 994: 11.043315ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 995: 11.207711ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 996: 11.056246ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 997: 11.049706ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 998: 11.432449ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 999: 11.149617ms ``` #### After ``` [2022-09-15T21:41:49Z INFO lcli::block_root] Run 980: 14.011653ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 981: 8.925314ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 982: 8.849563ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 983: 8.893689ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 984: 8.902964ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 985: 8.942067ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 986: 8.907088ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 987: 9.346101ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 988: 8.96142ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 989: 9.366437ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 990: 9.809334ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 991: 9.541561ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 992: 11.143518ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 993: 10.821181ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 994: 9.855973ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 995: 10.941006ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 996: 9.596155ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 997: 9.121739ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 998: 9.090019ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 999: 9.071885ms ``` ## Additional Info Please provide any additional information. For example, future considerations or information useful for reviewers.
178 lines
5.8 KiB
Rust
178 lines
5.8 KiB
Rust
/// Contains the functions required for a `TreeHash` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_tree_hash {
|
|
($byte_size: expr) => {
|
|
fn tree_hash_type() -> tree_hash::TreeHashType {
|
|
tree_hash::TreeHashType::Vector
|
|
}
|
|
|
|
fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding {
|
|
unreachable!("Vector should never be packed.")
|
|
}
|
|
|
|
fn tree_hash_packing_factor() -> usize {
|
|
unreachable!("Vector should never be packed.")
|
|
}
|
|
|
|
fn tree_hash_root(&self) -> tree_hash::Hash256 {
|
|
// We could use the tree hash implementation for `FixedVec<u8, $byte_size>`,
|
|
// but benchmarks have show that to be at least 15% slower because of the
|
|
// unnecessary copying and allocation (one Vec per byte)
|
|
let values_per_chunk = tree_hash::BYTES_PER_CHUNK;
|
|
let minimum_chunk_count = ($byte_size + values_per_chunk - 1) / values_per_chunk;
|
|
tree_hash::merkle_root(&self.serialize(), minimum_chunk_count)
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `ssz::Encode` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_ssz_encode {
|
|
($byte_size: expr) => {
|
|
fn is_ssz_fixed_len() -> bool {
|
|
true
|
|
}
|
|
|
|
fn ssz_fixed_len() -> usize {
|
|
$byte_size
|
|
}
|
|
|
|
fn ssz_bytes_len(&self) -> usize {
|
|
$byte_size
|
|
}
|
|
|
|
fn ssz_append(&self, buf: &mut Vec<u8>) {
|
|
buf.extend_from_slice(&self.serialize())
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `ssz::Decode` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_ssz_decode {
|
|
($byte_size: expr) => {
|
|
fn is_ssz_fixed_len() -> bool {
|
|
true
|
|
}
|
|
|
|
fn ssz_fixed_len() -> usize {
|
|
$byte_size
|
|
}
|
|
|
|
fn from_ssz_bytes(bytes: &[u8]) -> Result<Self, ssz::DecodeError> {
|
|
let len = bytes.len();
|
|
let expected = <Self as ssz::Decode>::ssz_fixed_len();
|
|
|
|
if len != expected {
|
|
Err(ssz::DecodeError::InvalidByteLength { len, expected })
|
|
} else {
|
|
Self::deserialize(bytes)
|
|
.map_err(|e| ssz::DecodeError::BytesInvalid(format!("{:?}", e)))
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `fmt::Display` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_display {
|
|
() => {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}", hex_encode(self.serialize().to_vec()))
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `fmt::Display` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_from_str {
|
|
() => {
|
|
type Err = String;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
if let Some(stripped) = s.strip_prefix("0x") {
|
|
let bytes = hex::decode(stripped).map_err(|e| e.to_string())?;
|
|
Self::deserialize(&bytes[..]).map_err(|e| format!("{:?}", e))
|
|
} else {
|
|
Err("must start with 0x".to_string())
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `serde::Serialize` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_serde_serialize {
|
|
() => {
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: Serializer,
|
|
{
|
|
serializer.serialize_str(&self.to_string())
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `serde::Deserialize` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_serde_deserialize {
|
|
() => {
|
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
pub struct StringVisitor;
|
|
|
|
impl<'de> serde::de::Visitor<'de> for StringVisitor {
|
|
type Value = String;
|
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
formatter.write_str("a hex string with 0x prefix")
|
|
}
|
|
|
|
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
|
where
|
|
E: serde::de::Error,
|
|
{
|
|
Ok(value.to_string())
|
|
}
|
|
}
|
|
|
|
let string = deserializer.deserialize_str(StringVisitor)?;
|
|
<Self as std::str::FromStr>::from_str(&string).map_err(serde::de::Error::custom)
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for a `Debug` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
macro_rules! impl_debug {
|
|
() => {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(f, "{}", hex_encode(&self.serialize().to_vec()))
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Contains the functions required for an `Arbitrary` implementation.
|
|
///
|
|
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
|
|
#[cfg(feature = "arbitrary")]
|
|
macro_rules! impl_arbitrary {
|
|
($byte_size: expr) => {
|
|
fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
|
|
let mut bytes = [0u8; $byte_size];
|
|
u.fill_buffer(&mut bytes)?;
|
|
Self::deserialize(&bytes).map_err(|_| arbitrary::Error::IncorrectFormat)
|
|
}
|
|
};
|
|
}
|