lighthouse/crypto/bls/src/macros.rs
Paul Hauner b0b606dabe Use SmallVec for TreeHash packed encoding (#3581)
## Issue Addressed

NA

## Proposed Changes

I've noticed that our block hashing times increase significantly after the merge. I did some flamegraph-ing and noticed that we're allocating a `Vec` for each byte of each execution payload transaction. This seems like unnecessary work and a bit of a fragmentation risk.

This PR switches to `SmallVec<[u8; 32]>` for the packed encoding of `TreeHash`. I believe this is a nice simple optimisation with no downside.

### Benchmarking

These numbers were computed using #3580 on my desktop (i7 hex-core). You can see a bit of noise in the numbers, that's probably just my computer doing other things. Generally I found this change takes the time from 10-11ms to 8-9ms. I can also see all the allocations disappear from flamegraph.

This is the block being benchmarked: https://beaconcha.in/slot/4704236

#### Before

```
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 980: 10.553003ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 981: 10.563737ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 982: 10.646352ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 983: 10.628532ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 984: 10.552112ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 985: 10.587778ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 986: 10.640526ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 987: 10.587243ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 988: 10.554748ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 989: 10.551111ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 990: 11.559031ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 991: 11.944827ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 992: 10.554308ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 993: 11.043397ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 994: 11.043315ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 995: 11.207711ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 996: 11.056246ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 997: 11.049706ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 998: 11.432449ms
[2022-09-15T21:44:19Z INFO  lcli::block_root] Run 999: 11.149617ms
```

#### After

```
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 980: 14.011653ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 981: 8.925314ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 982: 8.849563ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 983: 8.893689ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 984: 8.902964ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 985: 8.942067ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 986: 8.907088ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 987: 9.346101ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 988: 8.96142ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 989: 9.366437ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 990: 9.809334ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 991: 9.541561ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 992: 11.143518ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 993: 10.821181ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 994: 9.855973ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 995: 10.941006ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 996: 9.596155ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 997: 9.121739ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 998: 9.090019ms
[2022-09-15T21:41:49Z INFO  lcli::block_root] Run 999: 9.071885ms
```

## Additional Info

Please provide any additional information. For example, future considerations
or information useful for reviewers.
2022-09-16 08:54:06 +00:00

178 lines
5.8 KiB
Rust

/// Contains the functions required for a `TreeHash` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_tree_hash {
($byte_size: expr) => {
fn tree_hash_type() -> tree_hash::TreeHashType {
tree_hash::TreeHashType::Vector
}
fn tree_hash_packed_encoding(&self) -> tree_hash::PackedEncoding {
unreachable!("Vector should never be packed.")
}
fn tree_hash_packing_factor() -> usize {
unreachable!("Vector should never be packed.")
}
fn tree_hash_root(&self) -> tree_hash::Hash256 {
// We could use the tree hash implementation for `FixedVec<u8, $byte_size>`,
// but benchmarks have show that to be at least 15% slower because of the
// unnecessary copying and allocation (one Vec per byte)
let values_per_chunk = tree_hash::BYTES_PER_CHUNK;
let minimum_chunk_count = ($byte_size + values_per_chunk - 1) / values_per_chunk;
tree_hash::merkle_root(&self.serialize(), minimum_chunk_count)
}
};
}
/// Contains the functions required for a `ssz::Encode` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_ssz_encode {
($byte_size: expr) => {
fn is_ssz_fixed_len() -> bool {
true
}
fn ssz_fixed_len() -> usize {
$byte_size
}
fn ssz_bytes_len(&self) -> usize {
$byte_size
}
fn ssz_append(&self, buf: &mut Vec<u8>) {
buf.extend_from_slice(&self.serialize())
}
};
}
/// Contains the functions required for a `ssz::Decode` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_ssz_decode {
($byte_size: expr) => {
fn is_ssz_fixed_len() -> bool {
true
}
fn ssz_fixed_len() -> usize {
$byte_size
}
fn from_ssz_bytes(bytes: &[u8]) -> Result<Self, ssz::DecodeError> {
let len = bytes.len();
let expected = <Self as ssz::Decode>::ssz_fixed_len();
if len != expected {
Err(ssz::DecodeError::InvalidByteLength { len, expected })
} else {
Self::deserialize(bytes)
.map_err(|e| ssz::DecodeError::BytesInvalid(format!("{:?}", e)))
}
}
};
}
/// Contains the functions required for a `fmt::Display` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_display {
() => {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", hex_encode(self.serialize().to_vec()))
}
};
}
/// Contains the functions required for a `fmt::Display` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_from_str {
() => {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Some(stripped) = s.strip_prefix("0x") {
let bytes = hex::decode(stripped).map_err(|e| e.to_string())?;
Self::deserialize(&bytes[..]).map_err(|e| format!("{:?}", e))
} else {
Err("must start with 0x".to_string())
}
}
};
}
/// Contains the functions required for a `serde::Serialize` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_serde_serialize {
() => {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.to_string())
}
};
}
/// Contains the functions required for a `serde::Deserialize` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_serde_deserialize {
() => {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
pub struct StringVisitor;
impl<'de> serde::de::Visitor<'de> for StringVisitor {
type Value = String;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a hex string with 0x prefix")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(value.to_string())
}
}
let string = deserializer.deserialize_str(StringVisitor)?;
<Self as std::str::FromStr>::from_str(&string).map_err(serde::de::Error::custom)
}
};
}
/// Contains the functions required for a `Debug` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
macro_rules! impl_debug {
() => {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", hex_encode(&self.serialize().to_vec()))
}
};
}
/// Contains the functions required for an `Arbitrary` implementation.
///
/// Does not include the `Impl` section since it gets very complicated when it comes to generics.
#[cfg(feature = "arbitrary")]
macro_rules! impl_arbitrary {
($byte_size: expr) => {
fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
let mut bytes = [0u8; $byte_size];
u.fill_buffer(&mut bytes)?;
Self::deserialize(&bytes).map_err(|_| arbitrary::Error::IncorrectFormat)
}
};
}