From b0b606dabe5c1775c6cb1436bf459cd5c888e97c Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Fri, 16 Sep 2022 08:54:06 +0000 Subject: [PATCH] Use `SmallVec` for `TreeHash` packed encoding (#3581) ## Issue Addressed NA ## Proposed Changes I've noticed that our block hashing times increase significantly after the merge. I did some flamegraph-ing and noticed that we're allocating a `Vec` for each byte of each execution payload transaction. This seems like unnecessary work and a bit of a fragmentation risk. This PR switches to `SmallVec<[u8; 32]>` for the packed encoding of `TreeHash`. I believe this is a nice simple optimisation with no downside. ### Benchmarking These numbers were computed using #3580 on my desktop (i7 hex-core). You can see a bit of noise in the numbers, that's probably just my computer doing other things. Generally I found this change takes the time from 10-11ms to 8-9ms. I can also see all the allocations disappear from flamegraph. This is the block being benchmarked: https://beaconcha.in/slot/4704236 #### Before ``` [2022-09-15T21:44:19Z INFO lcli::block_root] Run 980: 10.553003ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 981: 10.563737ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 982: 10.646352ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 983: 10.628532ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 984: 10.552112ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 985: 10.587778ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 986: 10.640526ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 987: 10.587243ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 988: 10.554748ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 989: 10.551111ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 990: 11.559031ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 991: 11.944827ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 992: 10.554308ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 993: 11.043397ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 994: 11.043315ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 995: 11.207711ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 996: 11.056246ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 997: 11.049706ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 998: 11.432449ms [2022-09-15T21:44:19Z INFO lcli::block_root] Run 999: 11.149617ms ``` #### After ``` [2022-09-15T21:41:49Z INFO lcli::block_root] Run 980: 14.011653ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 981: 8.925314ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 982: 8.849563ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 983: 8.893689ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 984: 8.902964ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 985: 8.942067ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 986: 8.907088ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 987: 9.346101ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 988: 8.96142ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 989: 9.366437ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 990: 9.809334ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 991: 9.541561ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 992: 11.143518ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 993: 10.821181ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 994: 9.855973ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 995: 10.941006ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 996: 9.596155ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 997: 9.121739ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 998: 9.090019ms [2022-09-15T21:41:49Z INFO lcli::block_root] Run 999: 9.071885ms ``` ## Additional Info Please provide any additional information. For example, future considerations or information useful for reviewers. --- consensus/ssz_types/src/bitfield.rs | 4 +-- consensus/ssz_types/src/fixed_vector.rs | 2 +- consensus/ssz_types/src/variable_list.rs | 2 +- consensus/tree_hash/src/impls.rs | 30 ++++++++++----------- consensus/tree_hash/src/lib.rs | 11 +++++--- consensus/tree_hash/tests/tests.rs | 4 +-- consensus/tree_hash_derive/src/lib.rs | 6 ++--- consensus/types/src/execution_block_hash.rs | 2 +- consensus/types/src/graffiti.rs | 4 +-- consensus/types/src/participation_flags.rs | 4 +-- consensus/types/src/payload.rs | 6 ++--- consensus/types/src/slot_epoch.rs | 2 +- consensus/types/src/slot_epoch_macros.rs | 4 +-- crypto/bls/src/macros.rs | 2 +- testing/ef_tests/src/cases/common.rs | 2 +- 15 files changed, 44 insertions(+), 41 deletions(-) diff --git a/consensus/ssz_types/src/bitfield.rs b/consensus/ssz_types/src/bitfield.rs index 599170fa2..b0cf4551e 100644 --- a/consensus/ssz_types/src/bitfield.rs +++ b/consensus/ssz_types/src/bitfield.rs @@ -620,7 +620,7 @@ impl tree_hash::TreeHash for Bitfield> { tree_hash::TreeHashType::List } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("List should never be packed.") } @@ -641,7 +641,7 @@ impl tree_hash::TreeHash for Bitfield> { tree_hash::TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("Vector should never be packed.") } diff --git a/consensus/ssz_types/src/fixed_vector.rs b/consensus/ssz_types/src/fixed_vector.rs index 5f7a4af96..e64e76ef4 100644 --- a/consensus/ssz_types/src/fixed_vector.rs +++ b/consensus/ssz_types/src/fixed_vector.rs @@ -167,7 +167,7 @@ where tree_hash::TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("Vector should never be packed.") } diff --git a/consensus/ssz_types/src/variable_list.rs b/consensus/ssz_types/src/variable_list.rs index 49f8004b2..f23872c87 100644 --- a/consensus/ssz_types/src/variable_list.rs +++ b/consensus/ssz_types/src/variable_list.rs @@ -184,7 +184,7 @@ where tree_hash::TreeHashType::List } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("List should never be packed.") } diff --git a/consensus/tree_hash/src/impls.rs b/consensus/tree_hash/src/impls.rs index 00fed489c..cf05d2a3d 100644 --- a/consensus/tree_hash/src/impls.rs +++ b/consensus/tree_hash/src/impls.rs @@ -14,8 +14,8 @@ macro_rules! impl_for_bitsize { TreeHashType::Basic } - fn tree_hash_packed_encoding(&self) -> Vec { - self.to_le_bytes().to_vec() + fn tree_hash_packed_encoding(&self) -> PackedEncoding { + PackedEncoding::from_slice(&self.to_le_bytes()) } fn tree_hash_packing_factor() -> usize { @@ -41,7 +41,7 @@ impl TreeHash for bool { TreeHashType::Basic } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { (*self as u8).tree_hash_packed_encoding() } @@ -62,7 +62,7 @@ macro_rules! impl_for_lt_32byte_u8_array { TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { unreachable!("bytesN should never be packed.") } @@ -87,10 +87,10 @@ impl TreeHash for U128 { TreeHashType::Basic } - fn tree_hash_packed_encoding(&self) -> Vec { - let mut result = vec![0; 16]; + fn tree_hash_packed_encoding(&self) -> PackedEncoding { + let mut result = [0; 16]; self.to_little_endian(&mut result); - result + PackedEncoding::from_slice(&result) } fn tree_hash_packing_factor() -> usize { @@ -109,10 +109,10 @@ impl TreeHash for U256 { TreeHashType::Basic } - fn tree_hash_packed_encoding(&self) -> Vec { - let mut result = vec![0; 32]; + fn tree_hash_packed_encoding(&self) -> PackedEncoding { + let mut result = [0; 32]; self.to_little_endian(&mut result); - result + PackedEncoding::from_slice(&result) } fn tree_hash_packing_factor() -> usize { @@ -131,10 +131,10 @@ impl TreeHash for H160 { TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { - let mut result = vec![0; 32]; + fn tree_hash_packed_encoding(&self) -> PackedEncoding { + let mut result = [0; 32]; result[0..20].copy_from_slice(self.as_bytes()); - result + PackedEncoding::from_slice(&result) } fn tree_hash_packing_factor() -> usize { @@ -153,8 +153,8 @@ impl TreeHash for H256 { TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { - self.as_bytes().to_vec() + fn tree_hash_packed_encoding(&self) -> PackedEncoding { + PackedEncoding::from_slice(self.as_bytes()) } fn tree_hash_packing_factor() -> usize { diff --git a/consensus/tree_hash/src/lib.rs b/consensus/tree_hash/src/lib.rs index 997ae867f..ec40de916 100644 --- a/consensus/tree_hash/src/lib.rs +++ b/consensus/tree_hash/src/lib.rs @@ -8,13 +8,16 @@ pub use merkleize_padded::merkleize_padded; pub use merkleize_standard::merkleize_standard; use eth2_hashing::{hash_fixed, ZERO_HASHES, ZERO_HASHES_MAX_INDEX}; +use smallvec::SmallVec; pub const BYTES_PER_CHUNK: usize = 32; pub const HASHSIZE: usize = 32; pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK; pub const MAX_UNION_SELECTOR: u8 = 127; +pub const SMALLVEC_SIZE: usize = 32; pub type Hash256 = ethereum_types::H256; +pub type PackedEncoding = SmallVec<[u8; SMALLVEC_SIZE]>; /// Convenience method for `MerkleHasher` which also provides some fast-paths for small trees. /// @@ -109,7 +112,7 @@ pub enum TreeHashType { pub trait TreeHash { fn tree_hash_type() -> TreeHashType; - fn tree_hash_packed_encoding(&self) -> Vec; + fn tree_hash_packed_encoding(&self) -> PackedEncoding; fn tree_hash_packing_factor() -> usize; @@ -125,7 +128,7 @@ where T::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { T::tree_hash_packed_encoding(*self) } @@ -146,7 +149,7 @@ macro_rules! tree_hash_ssz_encoding_as_vector { tree_hash::TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { unreachable!("Vector should never be packed.") } @@ -169,7 +172,7 @@ macro_rules! tree_hash_ssz_encoding_as_list { tree_hash::TreeHashType::List } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { unreachable!("List should never be packed.") } diff --git a/consensus/tree_hash/tests/tests.rs b/consensus/tree_hash/tests/tests.rs index b7f7178d0..8b2a4b21b 100644 --- a/consensus/tree_hash/tests/tests.rs +++ b/consensus/tree_hash/tests/tests.rs @@ -1,5 +1,5 @@ use ssz_derive::Encode; -use tree_hash::{Hash256, MerkleHasher, TreeHash, BYTES_PER_CHUNK}; +use tree_hash::{Hash256, MerkleHasher, PackedEncoding, TreeHash, BYTES_PER_CHUNK}; use tree_hash_derive::TreeHash; #[derive(Encode)] @@ -18,7 +18,7 @@ impl tree_hash::TreeHash for HashVec { tree_hash::TreeHashType::List } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { unreachable!("List should never be packed.") } diff --git a/consensus/tree_hash_derive/src/lib.rs b/consensus/tree_hash_derive/src/lib.rs index f65be1b6b..21ff324d5 100644 --- a/consensus/tree_hash_derive/src/lib.rs +++ b/consensus/tree_hash_derive/src/lib.rs @@ -150,7 +150,7 @@ fn tree_hash_derive_struct(item: &DeriveInput, struct_data: &DataStruct) -> Toke tree_hash::TreeHashType::Container } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("Struct should never be packed.") } @@ -231,7 +231,7 @@ fn tree_hash_derive_enum_transparent( tree_hash::TreeHashType::Container } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("Enum should never be packed") } @@ -288,7 +288,7 @@ fn tree_hash_derive_enum_union(derive_input: &DeriveInput, enum_data: &DataEnum) tree_hash::TreeHashType::Container } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("Enum should never be packed") } diff --git a/consensus/types/src/execution_block_hash.rs b/consensus/types/src/execution_block_hash.rs index 978bd4c69..988dcece5 100644 --- a/consensus/types/src/execution_block_hash.rs +++ b/consensus/types/src/execution_block_hash.rs @@ -67,7 +67,7 @@ impl tree_hash::TreeHash for ExecutionBlockHash { Hash256::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { self.0.tree_hash_packed_encoding() } diff --git a/consensus/types/src/graffiti.rs b/consensus/types/src/graffiti.rs index f5f74b601..73beb8264 100644 --- a/consensus/types/src/graffiti.rs +++ b/consensus/types/src/graffiti.rs @@ -7,7 +7,7 @@ use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; use ssz::{Decode, DecodeError, Encode}; use std::fmt; use std::str::FromStr; -use tree_hash::TreeHash; +use tree_hash::{PackedEncoding, TreeHash}; pub const GRAFFITI_BYTES_LEN: usize = 32; @@ -159,7 +159,7 @@ impl TreeHash for Graffiti { <[u8; GRAFFITI_BYTES_LEN]>::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { self.0.tree_hash_packed_encoding() } diff --git a/consensus/types/src/participation_flags.rs b/consensus/types/src/participation_flags.rs index 74c2cf73b..a2dd49486 100644 --- a/consensus/types/src/participation_flags.rs +++ b/consensus/types/src/participation_flags.rs @@ -3,7 +3,7 @@ use safe_arith::{ArithError, SafeArith}; use serde_derive::{Deserialize, Serialize}; use ssz::{Decode, DecodeError, Encode}; use test_random_derive::TestRandom; -use tree_hash::{TreeHash, TreeHashType}; +use tree_hash::{PackedEncoding, TreeHash, TreeHashType}; #[derive(Debug, Default, Clone, Copy, PartialEq, Deserialize, Serialize, TestRandom)] #[serde(transparent)] @@ -78,7 +78,7 @@ impl TreeHash for ParticipationFlags { u8::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { self.bits.tree_hash_packed_encoding() } diff --git a/consensus/types/src/payload.rs b/consensus/types/src/payload.rs index 114ca02ec..667fff58c 100644 --- a/consensus/types/src/payload.rs +++ b/consensus/types/src/payload.rs @@ -7,7 +7,7 @@ use std::convert::TryFrom; use std::fmt::Debug; use std::hash::Hash; use test_random_derive::TestRandom; -use tree_hash::TreeHash; +use tree_hash::{PackedEncoding, TreeHash}; #[derive(Debug)] pub enum BlockType { @@ -175,7 +175,7 @@ impl TreeHash for BlindedPayload { >::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> PackedEncoding { self.execution_payload_header.tree_hash_packed_encoding() } @@ -244,7 +244,7 @@ impl TreeHash for FullPayload { >::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { self.execution_payload.tree_hash_packed_encoding() } diff --git a/consensus/types/src/slot_epoch.rs b/consensus/types/src/slot_epoch.rs index e68055d34..277aa9dea 100644 --- a/consensus/types/src/slot_epoch.rs +++ b/consensus/types/src/slot_epoch.rs @@ -16,7 +16,7 @@ use crate::{ChainSpec, SignedRoot}; use rand::RngCore; use safe_arith::{ArithError, SafeArith}; use serde_derive::{Deserialize, Serialize}; -use ssz::{ssz_encode, Decode, DecodeError, Encode}; +use ssz::{Decode, DecodeError, Encode}; use std::fmt; use std::hash::Hash; use std::iter::Iterator; diff --git a/consensus/types/src/slot_epoch_macros.rs b/consensus/types/src/slot_epoch_macros.rs index b8e6202fe..fafc455ef 100644 --- a/consensus/types/src/slot_epoch_macros.rs +++ b/consensus/types/src/slot_epoch_macros.rs @@ -290,8 +290,8 @@ macro_rules! impl_ssz { tree_hash::TreeHashType::Basic } - fn tree_hash_packed_encoding(&self) -> Vec { - ssz_encode(self) + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { + self.0.tree_hash_packed_encoding() } fn tree_hash_packing_factor() -> usize { diff --git a/crypto/bls/src/macros.rs b/crypto/bls/src/macros.rs index a5fce70a9..f3a7374ba 100644 --- a/crypto/bls/src/macros.rs +++ b/crypto/bls/src/macros.rs @@ -7,7 +7,7 @@ macro_rules! impl_tree_hash { tree_hash::TreeHashType::Vector } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { unreachable!("Vector should never be packed.") } diff --git a/testing/ef_tests/src/cases/common.rs b/testing/ef_tests/src/cases/common.rs index ade8711cd..e77e56193 100644 --- a/testing/ef_tests/src/cases/common.rs +++ b/testing/ef_tests/src/cases/common.rs @@ -43,7 +43,7 @@ macro_rules! uint_wrapper { <$wrapped_type>::tree_hash_type() } - fn tree_hash_packed_encoding(&self) -> Vec { + fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding { self.x.tree_hash_packed_encoding() }