diff --git a/.gitignore b/.gitignore index 346ef9afa..6b8d4ab21 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ Cargo.lock *.pk *.sk *.raw_keypairs +flamegraph.svg +perf.data* diff --git a/Cargo.toml b/Cargo.toml index b419d32e4..c05e22286 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "eth2/types", "eth2/utils/bls", "eth2/utils/boolean-bitfield", + "eth2/utils/cached_tree_hash", "eth2/utils/hashing", "eth2/utils/honey-badger-split", "eth2/utils/merkle_proof", diff --git a/eth2/state_processing/src/per_slot_processing.rs b/eth2/state_processing/src/per_slot_processing.rs index a68f98c6d..8f9606723 100644 --- a/eth2/state_processing/src/per_slot_processing.rs +++ b/eth2/state_processing/src/per_slot_processing.rs @@ -1,5 +1,5 @@ use crate::*; -use tree_hash::{SignedRoot, TreeHash}; +use tree_hash::SignedRoot; use types::*; #[derive(Debug, PartialEq)] @@ -24,7 +24,7 @@ pub fn per_slot_processing(state: &mut BeaconState, spec: &ChainSpec) -> Result< } fn cache_state(state: &mut BeaconState, spec: &ChainSpec) -> Result<(), Error> { - let previous_slot_state_root = Hash256::from_slice(&state.tree_hash_root()[..]); + let previous_slot_state_root = state.update_tree_hash_cache()?; // Note: increment the state slot here to allow use of our `state_root` and `block_root` // getter/setter functions. diff --git a/eth2/types/Cargo.toml b/eth2/types/Cargo.toml index b88e1d4cf..36e251d7e 100644 --- a/eth2/types/Cargo.toml +++ b/eth2/types/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] bls = { path = "../utils/bls" } boolean-bitfield = { path = "../utils/boolean-bitfield" } +cached_tree_hash = { path = "../utils/cached_tree_hash" } dirs = "1.0" derivative = "1.0" ethereum-types = "0.5" diff --git a/eth2/types/src/attestation.rs b/eth2/types/src/attestation.rs index f7bfdaab9..d1511763d 100644 --- a/eth2/types/src/attestation.rs +++ b/eth2/types/src/attestation.rs @@ -5,7 +5,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// Details an attestation that can be slashable. /// @@ -19,6 +19,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, )] @@ -58,4 +59,5 @@ mod tests { use super::*; ssz_tests!(Attestation); + cached_tree_hash_tests!(Attestation); } diff --git a/eth2/types/src/attestation_data.rs b/eth2/types/src/attestation_data.rs index f8a0ecd15..c963d6001 100644 --- a/eth2/types/src/attestation_data.rs +++ b/eth2/types/src/attestation_data.rs @@ -5,7 +5,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// The data upon which an attestation is based. /// @@ -21,6 +21,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, )] @@ -47,4 +48,5 @@ mod tests { use super::*; ssz_tests!(AttestationData); + cached_tree_hash_tests!(AttestationData); } diff --git a/eth2/types/src/attestation_data_and_custody_bit.rs b/eth2/types/src/attestation_data_and_custody_bit.rs index e5dc920dc..85a5875ab 100644 --- a/eth2/types/src/attestation_data_and_custody_bit.rs +++ b/eth2/types/src/attestation_data_and_custody_bit.rs @@ -3,12 +3,12 @@ use crate::test_utils::TestRandom; use rand::RngCore; use serde_derive::Serialize; use ssz_derive::{Decode, Encode}; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Used for pairing an attestation with a proof-of-custody. /// /// Spec v0.5.1 -#[derive(Debug, Clone, PartialEq, Default, Serialize, Encode, Decode, TreeHash)] +#[derive(Debug, Clone, PartialEq, Default, Serialize, Encode, Decode, TreeHash, CachedTreeHash)] pub struct AttestationDataAndCustodyBit { pub data: AttestationData, pub custody_bit: bool, @@ -28,4 +28,5 @@ mod test { use super::*; ssz_tests!(AttestationDataAndCustodyBit); + cached_tree_hash_tests!(AttestationDataAndCustodyBit); } diff --git a/eth2/types/src/attester_slashing.rs b/eth2/types/src/attester_slashing.rs index b5e851dbd..d4848b01c 100644 --- a/eth2/types/src/attester_slashing.rs +++ b/eth2/types/src/attester_slashing.rs @@ -3,12 +3,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Two conflicting attestations. /// /// Spec v0.5.1 -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + PartialEq, + Clone, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct AttesterSlashing { pub slashable_attestation_1: SlashableAttestation, pub slashable_attestation_2: SlashableAttestation, @@ -19,4 +30,5 @@ mod tests { use super::*; ssz_tests!(AttesterSlashing); + cached_tree_hash_tests!(AttesterSlashing); } diff --git a/eth2/types/src/beacon_block.rs b/eth2/types/src/beacon_block.rs index b4d2752d6..d198d16fc 100644 --- a/eth2/types/src/beacon_block.rs +++ b/eth2/types/src/beacon_block.rs @@ -6,7 +6,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// A block of the `BeaconChain`. /// @@ -20,6 +20,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, )] @@ -100,4 +101,5 @@ mod tests { use super::*; ssz_tests!(BeaconBlock); + cached_tree_hash_tests!(BeaconBlock); } diff --git a/eth2/types/src/beacon_block_body.rs b/eth2/types/src/beacon_block_body.rs index de4951f1f..15ba00d6b 100644 --- a/eth2/types/src/beacon_block_body.rs +++ b/eth2/types/src/beacon_block_body.rs @@ -4,12 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// The body of a `BeaconChain` block, containing operations. /// /// Spec v0.5.1 -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + PartialEq, + Clone, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct BeaconBlockBody { pub randao_reveal: Signature, pub eth1_data: Eth1Data, @@ -26,4 +37,5 @@ mod tests { use super::*; ssz_tests!(BeaconBlockBody); + cached_tree_hash_tests!(BeaconBlockBody); } diff --git a/eth2/types/src/beacon_block_header.rs b/eth2/types/src/beacon_block_header.rs index e4db3a721..5b35da1b6 100644 --- a/eth2/types/src/beacon_block_header.rs +++ b/eth2/types/src/beacon_block_header.rs @@ -6,7 +6,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::{SignedRoot, TreeHash}; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// A header of a `BeaconBlock`. /// @@ -20,6 +20,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, )] @@ -59,4 +60,5 @@ mod tests { use super::*; ssz_tests!(BeaconBlockHeader); + cached_tree_hash_tests!(BeaconBlockHeader); } diff --git a/eth2/types/src/beacon_state.rs b/eth2/types/src/beacon_state.rs index eef408308..e9b052f99 100644 --- a/eth2/types/src/beacon_state.rs +++ b/eth2/types/src/beacon_state.rs @@ -1,6 +1,7 @@ use self::epoch_cache::{get_active_validator_indices, EpochCache, Error as EpochCacheError}; use crate::test_utils::TestRandom; use crate::*; +use cached_tree_hash::{Error as TreeHashCacheError, TreeHashCache}; use int_to_bytes::int_to_bytes32; use pubkey_cache::PubkeyCache; use rand::RngCore; @@ -9,7 +10,7 @@ use ssz::{hash, ssz_encode}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; mod epoch_cache; mod pubkey_cache; @@ -42,12 +43,24 @@ pub enum Error { EpochCacheUninitialized(RelativeEpoch), RelativeEpochError(RelativeEpochError), EpochCacheError(EpochCacheError), + TreeHashCacheError(TreeHashCacheError), } /// The state of the `BeaconChain` at some slot. /// /// Spec v0.5.1 -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, TestRandom, Encode, Decode, TreeHash)] +#[derive( + Debug, + PartialEq, + Clone, + Serialize, + Deserialize, + TestRandom, + Encode, + Decode, + TreeHash, + CachedTreeHash, +)] pub struct BeaconState { // Misc pub slot: Slot, @@ -112,6 +125,12 @@ pub struct BeaconState { #[tree_hash(skip_hashing)] #[test_random(default)] pub pubkey_cache: PubkeyCache, + #[serde(skip_serializing, skip_deserializing)] + #[ssz(skip_serializing)] + #[ssz(skip_deserializing)] + #[tree_hash(skip_hashing)] + #[test_random(default)] + pub tree_hash_cache: TreeHashCache, } impl BeaconState { @@ -187,6 +206,7 @@ impl BeaconState { EpochCache::default(), ], pubkey_cache: PubkeyCache::default(), + tree_hash_cache: TreeHashCache::default(), } } @@ -672,6 +692,7 @@ impl BeaconState { self.build_epoch_cache(RelativeEpoch::NextWithoutRegistryChange, spec)?; self.build_epoch_cache(RelativeEpoch::NextWithRegistryChange, spec)?; self.update_pubkey_cache()?; + self.update_tree_hash_cache()?; Ok(()) } @@ -778,6 +799,39 @@ impl BeaconState { pub fn drop_pubkey_cache(&mut self) { self.pubkey_cache = PubkeyCache::default() } + + /// Update the tree hash cache, building it for the first time if it is empty. + /// + /// Returns the `tree_hash_root` resulting from the update. This root can be considered the + /// canonical root of `self`. + pub fn update_tree_hash_cache(&mut self) -> Result { + if self.tree_hash_cache.is_empty() { + self.tree_hash_cache = TreeHashCache::new(self)?; + } else { + // Move the cache outside of `self` to satisfy the borrow checker. + let mut cache = std::mem::replace(&mut self.tree_hash_cache, TreeHashCache::default()); + + cache.update(self)?; + + // Move the updated cache back into `self`. + self.tree_hash_cache = cache + } + + self.cached_tree_hash_root() + } + + /// Returns the tree hash root determined by the last execution of `self.update_tree_hash_cache(..)`. + /// + /// Note: does _not_ update the cache and may return an outdated root. + /// + /// Returns an error if the cache is not initialized or if an error is encountered during the + /// cache update. + pub fn cached_tree_hash_root(&self) -> Result { + self.tree_hash_cache + .tree_hash_root() + .and_then(|b| Ok(Hash256::from_slice(b))) + .map_err(|e| e.into()) + } } impl From for Error { @@ -791,3 +845,9 @@ impl From for Error { Error::EpochCacheError(e) } } + +impl From for Error { + fn from(e: TreeHashCacheError) -> Error { + Error::TreeHashCacheError(e) + } +} diff --git a/eth2/types/src/beacon_state/tests.rs b/eth2/types/src/beacon_state/tests.rs index dc16a013b..d5862559a 100644 --- a/eth2/types/src/beacon_state/tests.rs +++ b/eth2/types/src/beacon_state/tests.rs @@ -3,6 +3,7 @@ use super::*; use crate::test_utils::*; ssz_tests!(BeaconState); +cached_tree_hash_tests!(BeaconState); /// Test that /// @@ -55,3 +56,22 @@ fn cache_initialization() { test_cache_initialization(&mut state, RelativeEpoch::NextWithRegistryChange, &spec); test_cache_initialization(&mut state, RelativeEpoch::NextWithoutRegistryChange, &spec); } + +#[test] +fn tree_hash_cache() { + use crate::test_utils::{SeedableRng, TestRandom, XorShiftRng}; + use tree_hash::TreeHash; + + let mut rng = XorShiftRng::from_seed([42; 16]); + + let mut state = BeaconState::random_for_test(&mut rng); + + let root = state.update_tree_hash_cache().unwrap(); + + assert_eq!(root.as_bytes(), &state.tree_hash_root()[..]); + + state.slot = state.slot + 1; + + let root = state.update_tree_hash_cache().unwrap(); + assert_eq!(root.as_bytes(), &state.tree_hash_root()[..]); +} diff --git a/eth2/types/src/crosslink.rs b/eth2/types/src/crosslink.rs index 623226ad6..448f5ea30 100644 --- a/eth2/types/src/crosslink.rs +++ b/eth2/types/src/crosslink.rs @@ -4,7 +4,7 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Specifies the block hash for a shard at an epoch. /// @@ -20,6 +20,7 @@ use tree_hash_derive::TreeHash; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, )] pub struct Crosslink { @@ -32,4 +33,5 @@ mod tests { use super::*; ssz_tests!(Crosslink); + cached_tree_hash_tests!(Crosslink); } diff --git a/eth2/types/src/crosslink_committee.rs b/eth2/types/src/crosslink_committee.rs index e8fc1b96d..25c42c07b 100644 --- a/eth2/types/src/crosslink_committee.rs +++ b/eth2/types/src/crosslink_committee.rs @@ -1,9 +1,20 @@ use crate::*; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; -#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize, Decode, Encode, TreeHash)] +#[derive( + Default, + Clone, + Debug, + PartialEq, + Serialize, + Deserialize, + Decode, + Encode, + TreeHash, + CachedTreeHash, +)] pub struct CrosslinkCommittee { pub slot: Slot, pub shard: Shard, diff --git a/eth2/types/src/deposit.rs b/eth2/types/src/deposit.rs index 291173d34..e8d2f5a4b 100644 --- a/eth2/types/src/deposit.rs +++ b/eth2/types/src/deposit.rs @@ -4,12 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// A deposit to potentially become a beacon chain validator. /// /// Spec v0.5.1 -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + PartialEq, + Clone, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct Deposit { pub proof: TreeHashVector, pub index: u64, @@ -21,4 +32,5 @@ mod tests { use super::*; ssz_tests!(Deposit); + cached_tree_hash_tests!(Deposit); } diff --git a/eth2/types/src/deposit_data.rs b/eth2/types/src/deposit_data.rs index bc96ac7c4..38c44d1a7 100644 --- a/eth2/types/src/deposit_data.rs +++ b/eth2/types/src/deposit_data.rs @@ -4,12 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Data generated by the deposit contract. /// /// Spec v0.5.1 -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + PartialEq, + Clone, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct DepositData { pub amount: u64, pub timestamp: u64, @@ -21,4 +32,5 @@ mod tests { use super::*; ssz_tests!(DepositData); + cached_tree_hash_tests!(DepositData); } diff --git a/eth2/types/src/deposit_input.rs b/eth2/types/src/deposit_input.rs index be2106cb4..af1049a20 100644 --- a/eth2/types/src/deposit_input.rs +++ b/eth2/types/src/deposit_input.rs @@ -6,7 +6,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::{SignedRoot, TreeHash}; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// The data supplied by the user to the deposit contract. /// @@ -21,6 +21,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Decode, SignedRoot, TreeHash, + CachedTreeHash, TestRandom, )] pub struct DepositInput { @@ -68,6 +69,7 @@ mod tests { use super::*; ssz_tests!(DepositInput); + cached_tree_hash_tests!(DepositInput); #[test] fn can_create_and_validate() { diff --git a/eth2/types/src/eth1_data.rs b/eth2/types/src/eth1_data.rs index 2ad460d13..3c0c3af02 100644 --- a/eth2/types/src/eth1_data.rs +++ b/eth2/types/src/eth1_data.rs @@ -4,13 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Contains data obtained from the Eth1 chain. /// /// Spec v0.5.1 #[derive( - Debug, PartialEq, Clone, Default, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom, + Debug, + PartialEq, + Clone, + Default, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, )] pub struct Eth1Data { pub deposit_root: Hash256, @@ -22,4 +32,5 @@ mod tests { use super::*; ssz_tests!(Eth1Data); + cached_tree_hash_tests!(Eth1Data); } diff --git a/eth2/types/src/eth1_data_vote.rs b/eth2/types/src/eth1_data_vote.rs index 7a77c8ff0..00818ebf4 100644 --- a/eth2/types/src/eth1_data_vote.rs +++ b/eth2/types/src/eth1_data_vote.rs @@ -4,13 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// A summation of votes for some `Eth1Data`. /// /// Spec v0.5.1 #[derive( - Debug, PartialEq, Clone, Default, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom, + Debug, + PartialEq, + Clone, + Default, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, )] pub struct Eth1DataVote { pub eth1_data: Eth1Data, @@ -22,4 +32,5 @@ mod tests { use super::*; ssz_tests!(Eth1DataVote); + cached_tree_hash_tests!(Eth1DataVote); } diff --git a/eth2/types/src/fork.rs b/eth2/types/src/fork.rs index d99842855..83d4f5dc6 100644 --- a/eth2/types/src/fork.rs +++ b/eth2/types/src/fork.rs @@ -7,13 +7,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Specifies a fork of the `BeaconChain`, to prevent replay attacks. /// /// Spec v0.5.1 #[derive( - Debug, Clone, PartialEq, Default, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom, + Debug, + Clone, + PartialEq, + Default, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, )] pub struct Fork { #[serde(deserialize_with = "fork_from_hex_str")] @@ -54,6 +64,7 @@ mod tests { use super::*; ssz_tests!(Fork); + cached_tree_hash_tests!(Fork); fn test_genesis(version: u32, epoch: Epoch) { let mut spec = ChainSpec::foundation(); diff --git a/eth2/types/src/historical_batch.rs b/eth2/types/src/historical_batch.rs index c4f62fcfc..13e57131a 100644 --- a/eth2/types/src/historical_batch.rs +++ b/eth2/types/src/historical_batch.rs @@ -4,12 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Historical block and state roots. /// /// Spec v0.5.1 -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + Clone, + PartialEq, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct HistoricalBatch { pub block_roots: TreeHashVector, pub state_roots: TreeHashVector, @@ -20,4 +31,5 @@ mod tests { use super::*; ssz_tests!(HistoricalBatch); + cached_tree_hash_tests!(HistoricalBatch); } diff --git a/eth2/types/src/pending_attestation.rs b/eth2/types/src/pending_attestation.rs index ce9ce3d77..b71351f9a 100644 --- a/eth2/types/src/pending_attestation.rs +++ b/eth2/types/src/pending_attestation.rs @@ -4,12 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// An attestation that has been included in the state but not yet fully processed. /// /// Spec v0.5.1 -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + Clone, + PartialEq, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct PendingAttestation { pub aggregation_bitfield: Bitfield, pub data: AttestationData, @@ -34,4 +45,5 @@ mod tests { use super::*; ssz_tests!(PendingAttestation); + cached_tree_hash_tests!(PendingAttestation); } diff --git a/eth2/types/src/proposer_slashing.rs b/eth2/types/src/proposer_slashing.rs index a3501a5bd..bf26ae508 100644 --- a/eth2/types/src/proposer_slashing.rs +++ b/eth2/types/src/proposer_slashing.rs @@ -4,12 +4,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Two conflicting proposals from the same proposer (validator). /// /// Spec v0.5.1 -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, Encode, Decode, TreeHash, TestRandom)] +#[derive( + Debug, + PartialEq, + Clone, + Serialize, + Deserialize, + Encode, + Decode, + TreeHash, + CachedTreeHash, + TestRandom, +)] pub struct ProposerSlashing { pub proposer_index: u64, pub header_1: BeaconBlockHeader, @@ -21,4 +32,5 @@ mod tests { use super::*; ssz_tests!(ProposerSlashing); + cached_tree_hash_tests!(ProposerSlashing); } diff --git a/eth2/types/src/slashable_attestation.rs b/eth2/types/src/slashable_attestation.rs index 9c460e482..fb838e0c4 100644 --- a/eth2/types/src/slashable_attestation.rs +++ b/eth2/types/src/slashable_attestation.rs @@ -4,7 +4,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// Details an attestation that can be slashable. /// @@ -20,6 +20,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, )] @@ -133,6 +134,7 @@ mod tests { } ssz_tests!(SlashableAttestation); + cached_tree_hash_tests!(SlashableAttestation); fn create_slashable_attestation( slot_factor: u64, diff --git a/eth2/types/src/slot_epoch_macros.rs b/eth2/types/src/slot_epoch_macros.rs index b3ca5c4bc..4a48bba9f 100644 --- a/eth2/types/src/slot_epoch_macros.rs +++ b/eth2/types/src/slot_epoch_macros.rs @@ -224,6 +224,26 @@ macro_rules! impl_ssz { } } + impl cached_tree_hash::CachedTreeHash for $type { + fn new_tree_hash_cache( + &self, + depth: usize, + ) -> Result { + self.0.new_tree_hash_cache(depth) + } + + fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { + self.0.tree_hash_cache_schema(depth) + } + + fn update_tree_hash_cache( + &self, + cache: &mut cached_tree_hash::TreeHashCache, + ) -> Result<(), cached_tree_hash::Error> { + self.0.update_tree_hash_cache(cache) + } + } + impl TestRandom for $type { fn random_for_test(rng: &mut T) -> Self { $type::from(u64::random_for_test(rng)) @@ -545,6 +565,7 @@ macro_rules! all_tests { math_between_tests!($type, $type); math_tests!($type); ssz_tests!($type); + cached_tree_hash_tests!($type); mod u64_tests { use super::*; diff --git a/eth2/types/src/test_utils/macros.rs b/eth2/types/src/test_utils/macros.rs index d5711e96e..71f462c1a 100644 --- a/eth2/types/src/test_utils/macros.rs +++ b/eth2/types/src/test_utils/macros.rs @@ -32,3 +32,51 @@ macro_rules! ssz_tests { } }; } + +#[cfg(test)] +#[macro_export] +macro_rules! cached_tree_hash_tests { + ($type: ident) => { + #[test] + pub fn test_cached_tree_hash() { + use crate::test_utils::{SeedableRng, TestRandom, XorShiftRng}; + use tree_hash::TreeHash; + + let mut rng = XorShiftRng::from_seed([42; 16]); + + // Test the original hash + let original = $type::random_for_test(&mut rng); + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root(), + "Original hash failed." + ); + + // Test the updated hash + let modified = $type::random_for_test(&mut rng); + cache.update(&modified).unwrap(); + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root(), + "Modification hash failed" + ); + + // Produce a new cache for the modified object and compare it to the updated cache. + let mut modified_cache = cached_tree_hash::TreeHashCache::new(&modified).unwrap(); + + // Reset the caches. + cache.reset_modifications(); + modified_cache.reset_modifications(); + + // Ensure the modified cache is the same as a newly created cache. This is a sanity + // check to make sure there are no artifacts of the original cache remaining after an + // update. + assert_eq!( + modified_cache, cache, + "The modified cache does not match a new cache." + ) + } + }; +} diff --git a/eth2/types/src/test_utils/test_random.rs b/eth2/types/src/test_utils/test_random.rs index cb7abe3a4..2d4269b08 100644 --- a/eth2/types/src/test_utils/test_random.rs +++ b/eth2/types/src/test_utils/test_random.rs @@ -44,11 +44,13 @@ where U: TestRandom, { fn random_for_test(rng: &mut T) -> Self { - vec![ - ::random_for_test(rng), - ::random_for_test(rng), - ::random_for_test(rng), - ] + let mut output = vec![]; + + for _ in 0..(usize::random_for_test(rng) % 4) { + output.push(::random_for_test(rng)); + } + + output } } diff --git a/eth2/types/src/transfer.rs b/eth2/types/src/transfer.rs index 82ead03d5..aea13fdd7 100644 --- a/eth2/types/src/transfer.rs +++ b/eth2/types/src/transfer.rs @@ -7,7 +7,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// The data submitted to the deposit contract. /// @@ -20,6 +20,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, Derivative, @@ -42,4 +43,5 @@ mod tests { use super::*; ssz_tests!(Transfer); + cached_tree_hash_tests!(Transfer); } diff --git a/eth2/types/src/tree_hash_vector.rs b/eth2/types/src/tree_hash_vector.rs index 1cc8e40a5..42a730f25 100644 --- a/eth2/types/src/tree_hash_vector.rs +++ b/eth2/types/src/tree_hash_vector.rs @@ -1,4 +1,5 @@ use crate::test_utils::{RngCore, TestRandom}; +use cached_tree_hash::CachedTreeHash; use serde_derive::{Deserialize, Serialize}; use ssz::{Decodable, DecodeError, Encodable, SszStream}; use std::ops::{Deref, DerefMut}; @@ -50,7 +51,34 @@ where } fn tree_hash_root(&self) -> Vec { - tree_hash::standard_tree_hash::vec_tree_hash_root(self) + tree_hash::impls::vec_tree_hash_root(self) + } +} + +impl CachedTreeHash for TreeHashVector +where + T: CachedTreeHash + TreeHash, +{ + fn new_tree_hash_cache( + &self, + depth: usize, + ) -> Result { + let (cache, _overlay) = cached_tree_hash::vec::new_tree_hash_cache(self, depth)?; + + Ok(cache) + } + + fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { + cached_tree_hash::vec::produce_schema(self, depth) + } + + fn update_tree_hash_cache( + &self, + cache: &mut cached_tree_hash::TreeHashCache, + ) -> Result<(), cached_tree_hash::Error> { + cached_tree_hash::vec::update_tree_hash_cache(self, cache)?; + + Ok(()) } } @@ -77,6 +105,38 @@ where U: TestRandom, { fn random_for_test(rng: &mut T) -> Self { - Vec::random_for_test(rng).into() + TreeHashVector::from(vec![ + U::random_for_test(rng), + U::random_for_test(rng), + U::random_for_test(rng), + ]) } } + +#[cfg(test)] +mod test { + use super::*; + use tree_hash::TreeHash; + + #[test] + pub fn test_cached_tree_hash() { + let original = TreeHashVector::from(vec![1_u64, 2, 3, 4]); + + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); + + let modified = TreeHashVector::from(vec![1_u64, 1, 1, 1]); + + cache.update(&modified).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); + } + +} diff --git a/eth2/types/src/validator.rs b/eth2/types/src/validator.rs index bbd68ed2b..a20eb6426 100644 --- a/eth2/types/src/validator.rs +++ b/eth2/types/src/validator.rs @@ -3,12 +3,23 @@ use rand::RngCore; use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; -use tree_hash_derive::TreeHash; +use tree_hash_derive::{CachedTreeHash, TreeHash}; /// Information about a `BeaconChain` validator. /// /// Spec v0.5.1 -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode, TestRandom, TreeHash)] +#[derive( + Debug, + Clone, + PartialEq, + Serialize, + Deserialize, + Encode, + Decode, + TestRandom, + TreeHash, + CachedTreeHash, +)] pub struct Validator { pub pubkey: PublicKey, pub withdrawal_credentials: Hash256, @@ -111,4 +122,5 @@ mod tests { } ssz_tests!(Validator); + cached_tree_hash_tests!(Validator); } diff --git a/eth2/types/src/voluntary_exit.rs b/eth2/types/src/voluntary_exit.rs index cb872cb98..8a780db75 100644 --- a/eth2/types/src/voluntary_exit.rs +++ b/eth2/types/src/voluntary_exit.rs @@ -5,7 +5,7 @@ use serde_derive::{Deserialize, Serialize}; use ssz_derive::{Decode, Encode}; use test_random_derive::TestRandom; use tree_hash::TreeHash; -use tree_hash_derive::{SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; /// An exit voluntarily submitted a validator who wishes to withdraw. /// @@ -19,6 +19,7 @@ use tree_hash_derive::{SignedRoot, TreeHash}; Encode, Decode, TreeHash, + CachedTreeHash, TestRandom, SignedRoot, )] @@ -34,4 +35,5 @@ mod tests { use super::*; ssz_tests!(VoluntaryExit); + cached_tree_hash_tests!(VoluntaryExit); } diff --git a/eth2/utils/bls/Cargo.toml b/eth2/utils/bls/Cargo.toml index 4ce499580..dcace15c8 100644 --- a/eth2/utils/bls/Cargo.toml +++ b/eth2/utils/bls/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" [dependencies] bls-aggregates = { git = "https://github.com/sigp/signature-schemes", tag = "0.6.1" } +cached_tree_hash = { path = "../cached_tree_hash" } hashing = { path = "../hashing" } hex = "0.3" serde = "1.0" diff --git a/eth2/utils/bls/src/aggregate_signature.rs b/eth2/utils/bls/src/aggregate_signature.rs index 0fbcc3493..e6c6cff9a 100644 --- a/eth2/utils/bls/src/aggregate_signature.rs +++ b/eth2/utils/bls/src/aggregate_signature.rs @@ -2,6 +2,7 @@ use super::{AggregatePublicKey, Signature, BLS_AGG_SIG_BYTE_SIZE}; use bls_aggregates::{ AggregatePublicKey as RawAggregatePublicKey, AggregateSignature as RawAggregateSignature, }; +use cached_tree_hash::cached_tree_hash_ssz_encoding_as_vector; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use serde_hex::{encode as hex_encode, HexVisitor}; @@ -167,6 +168,7 @@ impl<'de> Deserialize<'de> for AggregateSignature { } tree_hash_ssz_encoding_as_vector!(AggregateSignature); +cached_tree_hash_ssz_encoding_as_vector!(AggregateSignature, 96); #[cfg(test)] mod tests { diff --git a/eth2/utils/bls/src/fake_aggregate_signature.rs b/eth2/utils/bls/src/fake_aggregate_signature.rs index f201eba3e..aeb89507d 100644 --- a/eth2/utils/bls/src/fake_aggregate_signature.rs +++ b/eth2/utils/bls/src/fake_aggregate_signature.rs @@ -1,4 +1,5 @@ use super::{fake_signature::FakeSignature, AggregatePublicKey, BLS_AGG_SIG_BYTE_SIZE}; +use cached_tree_hash::cached_tree_hash_ssz_encoding_as_vector; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use serde_hex::{encode as hex_encode, PrefixedHexVisitor}; @@ -100,6 +101,7 @@ impl<'de> Deserialize<'de> for FakeAggregateSignature { } tree_hash_ssz_encoding_as_vector!(FakeAggregateSignature); +cached_tree_hash_ssz_encoding_as_vector!(FakeAggregateSignature, 96); #[cfg(test)] mod tests { diff --git a/eth2/utils/bls/src/fake_signature.rs b/eth2/utils/bls/src/fake_signature.rs index 3208ed992..8a333b9c0 100644 --- a/eth2/utils/bls/src/fake_signature.rs +++ b/eth2/utils/bls/src/fake_signature.rs @@ -1,4 +1,5 @@ use super::{PublicKey, SecretKey, BLS_SIG_BYTE_SIZE}; +use cached_tree_hash::cached_tree_hash_ssz_encoding_as_vector; use hex::encode as hex_encode; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; @@ -75,6 +76,7 @@ impl Decodable for FakeSignature { } tree_hash_ssz_encoding_as_vector!(FakeSignature); +cached_tree_hash_ssz_encoding_as_vector!(FakeSignature, 96); impl Serialize for FakeSignature { fn serialize(&self, serializer: S) -> Result diff --git a/eth2/utils/bls/src/public_key.rs b/eth2/utils/bls/src/public_key.rs index dcbbc622a..41b87d383 100644 --- a/eth2/utils/bls/src/public_key.rs +++ b/eth2/utils/bls/src/public_key.rs @@ -1,5 +1,6 @@ use super::{SecretKey, BLS_PUBLIC_KEY_BYTE_SIZE}; use bls_aggregates::PublicKey as RawPublicKey; +use cached_tree_hash::cached_tree_hash_ssz_encoding_as_vector; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use serde_hex::{encode as hex_encode, HexVisitor}; @@ -106,6 +107,7 @@ impl<'de> Deserialize<'de> for PublicKey { } tree_hash_ssz_encoding_as_vector!(PublicKey); +cached_tree_hash_ssz_encoding_as_vector!(PublicKey, 48); impl PartialEq for PublicKey { fn eq(&self, other: &PublicKey) -> bool { @@ -129,6 +131,7 @@ impl Hash for PublicKey { mod tests { use super::*; use ssz::ssz_encode; + use tree_hash::TreeHash; #[test] pub fn test_ssz_round_trip() { @@ -140,4 +143,27 @@ mod tests { assert_eq!(original, decoded); } + + #[test] + pub fn test_cached_tree_hash() { + let sk = SecretKey::random(); + let original = PublicKey::from_secret_key(&sk); + + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); + + let sk = SecretKey::random(); + let modified = PublicKey::from_secret_key(&sk); + + cache.update(&modified).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); + } } diff --git a/eth2/utils/bls/src/signature.rs b/eth2/utils/bls/src/signature.rs index 3fb68dc53..e2dbd9c27 100644 --- a/eth2/utils/bls/src/signature.rs +++ b/eth2/utils/bls/src/signature.rs @@ -1,5 +1,6 @@ use super::{PublicKey, SecretKey, BLS_SIG_BYTE_SIZE}; use bls_aggregates::Signature as RawSignature; +use cached_tree_hash::cached_tree_hash_ssz_encoding_as_vector; use hex::encode as hex_encode; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; @@ -116,6 +117,7 @@ impl Decodable for Signature { } tree_hash_ssz_encoding_as_vector!(Signature); +cached_tree_hash_ssz_encoding_as_vector!(Signature, 96); impl Serialize for Signature { /// Serde serialization is compliant the Ethereum YAML test format. @@ -145,6 +147,7 @@ mod tests { use super::super::Keypair; use super::*; use ssz::ssz_encode; + use tree_hash::TreeHash; #[test] pub fn test_ssz_round_trip() { @@ -158,6 +161,28 @@ mod tests { assert_eq!(original, decoded); } + #[test] + pub fn test_cached_tree_hash() { + let keypair = Keypair::random(); + let original = Signature::new(&[42, 42], 0, &keypair.sk); + + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); + + let modified = Signature::new(&[99, 99], 0, &keypair.sk); + + cache.update(&modified).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); + } + #[test] pub fn test_empty_signature() { let sig = Signature::empty_signature(); diff --git a/eth2/utils/boolean-bitfield/Cargo.toml b/eth2/utils/boolean-bitfield/Cargo.toml index 61bbc60a8..dfc97ce77 100644 --- a/eth2/utils/boolean-bitfield/Cargo.toml +++ b/eth2/utils/boolean-bitfield/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Paul Hauner "] edition = "2018" [dependencies] +cached_tree_hash = { path = "../cached_tree_hash" } serde_hex = { path = "../serde_hex" } ssz = { path = "../ssz" } bit-vec = "0.5.0" diff --git a/eth2/utils/boolean-bitfield/src/lib.rs b/eth2/utils/boolean-bitfield/src/lib.rs index 1d0f1c02e..d49da0d10 100644 --- a/eth2/utils/boolean-bitfield/src/lib.rs +++ b/eth2/utils/boolean-bitfield/src/lib.rs @@ -3,6 +3,7 @@ extern crate ssz; use bit_reverse::LookupReverse; use bit_vec::BitVec; +use cached_tree_hash::cached_tree_hash_bytes_as_list; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use serde_hex::{encode, PrefixedHexVisitor}; @@ -270,11 +271,35 @@ impl tree_hash::TreeHash for BooleanBitfield { } } +cached_tree_hash_bytes_as_list!(BooleanBitfield); + #[cfg(test)] mod tests { use super::*; use serde_yaml; use ssz::{decode, ssz_encode, SszStream}; + use tree_hash::TreeHash; + + #[test] + pub fn test_cached_tree_hash() { + let original = BooleanBitfield::from_bytes(&vec![18; 12][..]); + + let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + original.tree_hash_root() + ); + + let modified = BooleanBitfield::from_bytes(&vec![2; 1][..]); + + cache.update(&modified).unwrap(); + + assert_eq!( + cache.tree_hash_root().unwrap().to_vec(), + modified.tree_hash_root() + ); + } #[test] fn test_new_bitfield() { diff --git a/eth2/utils/cached_tree_hash/Cargo.toml b/eth2/utils/cached_tree_hash/Cargo.toml new file mode 100644 index 000000000..7b331ad68 --- /dev/null +++ b/eth2/utils/cached_tree_hash/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "cached_tree_hash" +version = "0.1.0" +authors = ["Paul Hauner "] +edition = "2018" + +[[bench]] +name = "benches" +harness = false + +[dev-dependencies] +criterion = "0.2" +tree_hash_derive = { path = "../tree_hash_derive" } + +[dependencies] +tree_hash = { path = "../tree_hash" } +ethereum-types = "0.5" +hashing = { path = "../hashing" } +int_to_bytes = { path = "../int_to_bytes" } diff --git a/eth2/utils/cached_tree_hash/README.md b/eth2/utils/cached_tree_hash/README.md new file mode 100644 index 000000000..0498bfc3e --- /dev/null +++ b/eth2/utils/cached_tree_hash/README.md @@ -0,0 +1,76 @@ +# Tree hashing + +Provides both cached and non-cached tree hashing methods. + +## Standard Tree Hash + +```rust +use tree_hash_derive::TreeHash; + +#[derive(TreeHash)] +struct Foo { + a: u64, + b: Vec, +} + +fn main() { + let foo = Foo { + a: 42, + b: vec![1, 2, 3] + }; + + println!("root: {}", foo.tree_hash_root()); +} +``` + +## Cached Tree Hash + + +```rust +use tree_hash_derive::{TreeHash, CachedTreeHash}; + +#[derive(TreeHash, CachedTreeHash)] +struct Foo { + a: u64, + b: Vec, +} + +#[derive(TreeHash, CachedTreeHash)] +struct Bar { + a: Vec, + b: u64, +} + +fn main() { + let bar = Bar { + a: vec![ + Foo { + a: 42, + b: vec![1, 2, 3] + } + ], + b: 42 + }; + + let modified_bar = Bar { + a: vec![ + Foo { + a: 100, + b: vec![1, 2, 3, 4, 5, 6] + }, + Foo { + a: 42, + b: vec![] + } + ], + b: 99 + }; + + + let mut hasher = CachedTreeHasher::new(&bar).unwrap(); + hasher.update(&modified_bar).unwrap(); + + // Assert that the cached tree hash matches a standard tree hash. + assert_eq!(hasher.tree_hash_root(), modified_bar.tree_hash_root()); +} +``` diff --git a/eth2/utils/cached_tree_hash/benches/benches.rs b/eth2/utils/cached_tree_hash/benches/benches.rs new file mode 100644 index 000000000..be7e26bb5 --- /dev/null +++ b/eth2/utils/cached_tree_hash/benches/benches.rs @@ -0,0 +1,73 @@ +#[macro_use] +extern crate criterion; + +use cached_tree_hash::TreeHashCache; +use criterion::black_box; +use criterion::{Benchmark, Criterion}; +use ethereum_types::H256 as Hash256; +use hashing::hash; +use tree_hash::TreeHash; + +fn criterion_benchmark(c: &mut Criterion) { + let n = 1024; + + let source_vec: Vec = (0..n).map(|_| Hash256::random()).collect(); + + let mut source_modified_vec = source_vec.clone(); + source_modified_vec[n - 1] = Hash256::random(); + + let modified_vec = source_modified_vec.clone(); + c.bench( + &format!("vec_of_{}_hashes", n), + Benchmark::new("standard", move |b| { + b.iter_with_setup( + || modified_vec.clone(), + |modified_vec| black_box(modified_vec.tree_hash_root()), + ) + }) + .sample_size(100), + ); + + let modified_vec = source_modified_vec.clone(); + c.bench( + &format!("vec_of_{}_hashes", n), + Benchmark::new("build_cache", move |b| { + b.iter_with_setup( + || modified_vec.clone(), + |vec| black_box(TreeHashCache::new(&vec, 0)), + ) + }) + .sample_size(100), + ); + + let vec = source_vec.clone(); + let modified_vec = source_modified_vec.clone(); + c.bench( + &format!("vec_of_{}_hashes", n), + Benchmark::new("cache_update", move |b| { + b.iter_with_setup( + || { + let cache = TreeHashCache::new(&vec, 0).unwrap(); + (cache, modified_vec.clone()) + }, + |(mut cache, modified_vec)| black_box(cache.update(&modified_vec)), + ) + }) + .sample_size(100), + ); + + c.bench( + &format!("{}_hashes", n), + Benchmark::new("hash_64_bytes", move |b| { + b.iter(|| { + for _ in 0..n { + let _digest = hash(&[42; 64]); + } + }) + }) + .sample_size(100), + ); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs b/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs new file mode 100644 index 000000000..1e67571d5 --- /dev/null +++ b/eth2/utils/cached_tree_hash/examples/8k_hashes_cached.rs @@ -0,0 +1,21 @@ +use cached_tree_hash::TreeHashCache; +use ethereum_types::H256 as Hash256; + +fn run(vec: &Vec, modified_vec: &Vec) { + let mut cache = TreeHashCache::new(vec).unwrap(); + + cache.update(modified_vec).unwrap(); +} + +fn main() { + let n = 2048; + + let vec: Vec = (0..n).map(|_| Hash256::random()).collect(); + + let mut modified_vec = vec.clone(); + modified_vec[n - 1] = Hash256::random(); + + for _ in 0..10_000 { + run(&vec, &modified_vec); + } +} diff --git a/eth2/utils/cached_tree_hash/examples/8k_hashes_standard.rs b/eth2/utils/cached_tree_hash/examples/8k_hashes_standard.rs new file mode 100644 index 000000000..bcbb392e2 --- /dev/null +++ b/eth2/utils/cached_tree_hash/examples/8k_hashes_standard.rs @@ -0,0 +1,10 @@ +use ethereum_types::H256 as Hash256; +use tree_hash::TreeHash; + +fn main() { + let n = 2048; + + let vec: Vec = (0..n).map(|_| Hash256::random()).collect(); + + vec.tree_hash_root(); +} diff --git a/eth2/utils/cached_tree_hash/src/btree_overlay.rs b/eth2/utils/cached_tree_hash/src/btree_overlay.rs new file mode 100644 index 000000000..a96df769c --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/btree_overlay.rs @@ -0,0 +1,340 @@ +use super::*; + +/// A schema defining a binary tree over a `TreeHashCache`. +/// +/// This structure is used for succinct storage, run-time functionality is gained by converting the +/// schema into a `BTreeOverlay`. +#[derive(Debug, PartialEq, Clone)] +pub struct BTreeSchema { + /// The depth of a schema defines how far it is nested within other fixed-length items. + /// + /// Each time a new variable-length object is created all items within it are assigned a depth + /// of `depth + 1`. + /// + /// When storing the schemas in a list, the depth parameter allows for removing all schemas + /// belonging to a specific variable-length item without removing schemas related to adjacent + /// variable-length items. + pub depth: usize, + lengths: Vec, +} + +impl BTreeSchema { + pub fn from_lengths(depth: usize, lengths: Vec) -> Self { + Self { depth, lengths } + } + + pub fn into_overlay(self, offset: usize) -> BTreeOverlay { + BTreeOverlay::from_schema(self, offset) + } +} + +impl Into for BTreeOverlay { + fn into(self) -> BTreeSchema { + BTreeSchema { + depth: self.depth, + lengths: self.lengths, + } + } +} + +/// Provides a status for some leaf-node in binary tree. +#[derive(Debug, PartialEq, Clone)] +pub enum LeafNode { + /// The leaf node does not exist in this tree. + DoesNotExist, + /// The leaf node exists in the tree and has a real value within the given `chunk` range. + Exists(Range), + /// The leaf node exists in the tree only as padding. + Padding, +} + +/// Instantiated from a `BTreeSchema`, allows for interpreting some chunks of a `TreeHashCache` as +/// a perfect binary tree. +/// +/// The primary purpose of this struct is to map from binary tree "nodes" to `TreeHashCache` +/// "chunks". Each tree has nodes `0..n` where `n` is the number of nodes and `0` is the root node. +/// Each of these nodes is mapped to a chunk, starting from `self.offset` and increasing in steps +/// of `1` for internal nodes and arbitrary steps for leaf-nodes. +#[derive(Debug, PartialEq, Clone)] +pub struct BTreeOverlay { + offset: usize, + /// See `BTreeSchema.depth` for a description. + pub depth: usize, + lengths: Vec, +} + +impl BTreeOverlay { + /// Instantiates a new instance for `item`, where it's first chunk is `inital_offset` and has + /// the specified `depth`. + pub fn new(item: &T, initial_offset: usize, depth: usize) -> Self + where + T: CachedTreeHash, + { + Self::from_schema(item.tree_hash_cache_schema(depth), initial_offset) + } + + /// Instantiates a new instance from a schema, where it's first chunk is `offset`. + pub fn from_schema(schema: BTreeSchema, offset: usize) -> Self { + Self { + offset, + depth: schema.depth, + lengths: schema.lengths, + } + } + + /// Returns a `LeafNode` for each of the `n` leaves of the tree. + /// + /// `LeafNode::DoesNotExist` is returned for each element `i` in `0..n` where `i >= + /// self.num_leaf_nodes()`. + pub fn get_leaf_nodes(&self, n: usize) -> Vec { + let mut running_offset = self.offset + self.num_internal_nodes(); + + let mut leaf_nodes: Vec = self + .lengths + .iter() + .map(|length| { + let range = running_offset..running_offset + length; + running_offset += length; + LeafNode::Exists(range) + }) + .collect(); + + leaf_nodes.resize(self.num_leaf_nodes(), LeafNode::Padding); + leaf_nodes.resize(n, LeafNode::DoesNotExist); + + leaf_nodes + } + + /// Returns the number of leaf nodes in the tree. + pub fn num_leaf_nodes(&self) -> usize { + self.lengths.len().next_power_of_two() + } + + /// Returns the number of leafs in the tree which are padding. + pub fn num_padding_leaves(&self) -> usize { + self.num_leaf_nodes() - self.lengths.len() + } + + /// Returns the number of nodes in the tree. + /// + /// Note: this is distinct from `num_chunks`, which returns the total number of chunks in + /// this tree. + pub fn num_nodes(&self) -> usize { + 2 * self.num_leaf_nodes() - 1 + } + + /// Returns the number of internal (non-leaf) nodes in the tree. + pub fn num_internal_nodes(&self) -> usize { + self.num_leaf_nodes() - 1 + } + + /// Returns the chunk of the first node of the tree. + fn first_node(&self) -> usize { + self.offset + } + + /// Returns the root chunk of the tree (the zero-th node) + pub fn root(&self) -> usize { + self.first_node() + } + + /// Returns the first chunk outside of the boundary of this tree. It is the root node chunk + /// plus the total number of chunks in the tree. + pub fn next_node(&self) -> usize { + self.first_node() + self.num_internal_nodes() + self.num_leaf_nodes() - self.lengths.len() + + self.lengths.iter().sum::() + } + + /// Returns the height of the tree where a tree with a single node has a height of 1. + pub fn height(&self) -> usize { + self.num_leaf_nodes().trailing_zeros() as usize + } + + /// Returns the range of chunks that belong to the internal nodes of the tree. + pub fn internal_chunk_range(&self) -> Range { + self.offset..self.offset + self.num_internal_nodes() + } + + /// Returns all of the chunks that are encompassed by the tree. + pub fn chunk_range(&self) -> Range { + self.first_node()..self.next_node() + } + + /// Returns the number of chunks inside this tree (including subtrees). + /// + /// Note: this is distinct from `num_nodes` which returns the number of nodes in the binary + /// tree. + pub fn num_chunks(&self) -> usize { + self.next_node() - self.first_node() + } + + /// Returns the first chunk of the first leaf node in the tree. + pub fn first_leaf_node(&self) -> usize { + self.offset + self.num_internal_nodes() + } + + /// Returns the chunks for some given parent node. + /// + /// Note: it is a parent _node_ not a parent _chunk_. + pub fn child_chunks(&self, parent: usize) -> (usize, usize) { + let children = children(parent); + + if children.1 < self.num_internal_nodes() { + (children.0 + self.offset, children.1 + self.offset) + } else { + let chunks = self.n_leaf_node_chunks(children.1); + (chunks[chunks.len() - 2], chunks[chunks.len() - 1]) + } + } + + /// Returns a vec of (parent_chunk, (left_child_chunk, right_child_chunk)). + pub fn internal_parents_and_children(&self) -> Vec<(usize, (usize, usize))> { + let mut chunks = Vec::with_capacity(self.num_nodes()); + chunks.append(&mut self.internal_node_chunks()); + chunks.append(&mut self.leaf_node_chunks()); + + (0..self.num_internal_nodes()) + .map(|parent| { + let children = children(parent); + (chunks[parent], (chunks[children.0], chunks[children.1])) + }) + .collect() + } + + /// Returns a vec of chunk indices for each internal node of the tree. + pub fn internal_node_chunks(&self) -> Vec { + (self.offset..self.offset + self.num_internal_nodes()).collect() + } + + /// Returns a vec of the first chunk for each leaf node of the tree. + pub fn leaf_node_chunks(&self) -> Vec { + self.n_leaf_node_chunks(self.num_leaf_nodes()) + } + + /// Returns a vec of the first chunk index for the first `n` leaf nodes of the tree. + fn n_leaf_node_chunks(&self, n: usize) -> Vec { + let mut chunks = Vec::with_capacity(n); + + let mut chunk = self.offset + self.num_internal_nodes(); + for i in 0..n { + chunks.push(chunk); + + match self.lengths.get(i) { + Some(len) => { + chunk += len; + } + None => chunk += 1, + } + } + + chunks + } +} + +fn children(parent: usize) -> (usize, usize) { + ((2 * parent + 1), (2 * parent + 2)) +} + +#[cfg(test)] +mod test { + use super::*; + + fn get_tree_a(n: usize) -> BTreeOverlay { + BTreeSchema::from_lengths(0, vec![1; n]).into_overlay(0) + } + + #[test] + fn leaf_node_chunks() { + let tree = get_tree_a(4); + + assert_eq!(tree.leaf_node_chunks(), vec![3, 4, 5, 6]) + } + + #[test] + fn internal_node_chunks() { + let tree = get_tree_a(4); + + assert_eq!(tree.internal_node_chunks(), vec![0, 1, 2]) + } + + #[test] + fn internal_parents_and_children() { + let tree = get_tree_a(4); + + assert_eq!( + tree.internal_parents_and_children(), + vec![(0, (1, 2)), (1, (3, 4)), (2, (5, 6))] + ) + } + + #[test] + fn chunk_range() { + let tree = get_tree_a(4); + assert_eq!(tree.chunk_range(), 0..7); + + let tree = get_tree_a(1); + assert_eq!(tree.chunk_range(), 0..1); + + let tree = get_tree_a(2); + assert_eq!(tree.chunk_range(), 0..3); + + let tree = BTreeSchema::from_lengths(0, vec![1, 1]).into_overlay(11); + assert_eq!(tree.chunk_range(), 11..14); + + let tree = BTreeSchema::from_lengths(0, vec![7, 7, 7]).into_overlay(0); + assert_eq!(tree.chunk_range(), 0..25); + } + + #[test] + fn get_leaf_node() { + let tree = get_tree_a(4); + let leaves = tree.get_leaf_nodes(5); + + assert_eq!(leaves[0], LeafNode::Exists(3..4)); + assert_eq!(leaves[1], LeafNode::Exists(4..5)); + assert_eq!(leaves[2], LeafNode::Exists(5..6)); + assert_eq!(leaves[3], LeafNode::Exists(6..7)); + assert_eq!(leaves[4], LeafNode::DoesNotExist); + + let tree = get_tree_a(3); + let leaves = tree.get_leaf_nodes(5); + + assert_eq!(leaves[0], LeafNode::Exists(3..4)); + assert_eq!(leaves[1], LeafNode::Exists(4..5)); + assert_eq!(leaves[2], LeafNode::Exists(5..6)); + assert_eq!(leaves[3], LeafNode::Padding); + assert_eq!(leaves[4], LeafNode::DoesNotExist); + + let tree = get_tree_a(0); + let leaves = tree.get_leaf_nodes(2); + + assert_eq!(leaves[0], LeafNode::Padding); + assert_eq!(leaves[1], LeafNode::DoesNotExist); + + let tree = BTreeSchema::from_lengths(0, vec![3]).into_overlay(0); + let leaves = tree.get_leaf_nodes(2); + assert_eq!(leaves[0], LeafNode::Exists(0..3)); + assert_eq!(leaves[1], LeafNode::DoesNotExist); + + let tree = BTreeSchema::from_lengths(0, vec![3]).into_overlay(10); + let leaves = tree.get_leaf_nodes(2); + assert_eq!(leaves[0], LeafNode::Exists(10..13)); + assert_eq!(leaves[1], LeafNode::DoesNotExist); + } + + #[test] + fn root_of_one_node() { + let tree = get_tree_a(1); + + assert_eq!(tree.root(), 0); + assert_eq!(tree.num_internal_nodes(), 0); + assert_eq!(tree.num_leaf_nodes(), 1); + } + + #[test] + fn child_chunks() { + let tree = get_tree_a(4); + + assert_eq!(tree.child_chunks(0), (1, 2)) + } +} diff --git a/eth2/utils/cached_tree_hash/src/errors.rs b/eth2/utils/cached_tree_hash/src/errors.rs new file mode 100644 index 000000000..d9ac02913 --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/errors.rs @@ -0,0 +1,19 @@ +use tree_hash::TreeHashType; + +#[derive(Debug, PartialEq, Clone)] +pub enum Error { + ShouldNotProduceBTreeOverlay, + NoFirstNode, + NoBytesForRoot, + UnableToObtainSlices, + UnableToGrowMerkleTree, + UnableToShrinkMerkleTree, + TreeCannotHaveZeroNodes, + CacheNotInitialized, + ShouldNeverBePacked(TreeHashType), + BytesAreNotEvenChunks(usize), + NoModifiedFieldForChunk(usize), + NoBytesForChunk(usize), + NoSchemaForIndex(usize), + NotLeafNode(usize), +} diff --git a/eth2/utils/cached_tree_hash/src/impls.rs b/eth2/utils/cached_tree_hash/src/impls.rs new file mode 100644 index 000000000..5105ad6a7 --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/impls.rs @@ -0,0 +1,110 @@ +use super::*; +use crate::merkleize::merkleize; +use ethereum_types::H256; + +pub mod vec; + +macro_rules! impl_for_single_leaf_int { + ($type: ident) => { + impl CachedTreeHash for $type { + fn new_tree_hash_cache(&self, _depth: usize) -> Result { + Ok(TreeHashCache::from_bytes( + merkleize(self.to_le_bytes().to_vec()), + false, + None, + )?) + } + + fn tree_hash_cache_schema(&self, depth: usize) -> BTreeSchema { + BTreeSchema::from_lengths(depth, vec![1]) + } + + fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { + let leaf = merkleize(self.to_le_bytes().to_vec()); + cache.maybe_update_chunk(cache.chunk_index, &leaf)?; + + cache.chunk_index += 1; + + Ok(()) + } + } + }; +} + +impl_for_single_leaf_int!(u8); +impl_for_single_leaf_int!(u16); +impl_for_single_leaf_int!(u32); +impl_for_single_leaf_int!(u64); +impl_for_single_leaf_int!(usize); + +impl CachedTreeHash for bool { + fn new_tree_hash_cache(&self, _depth: usize) -> Result { + Ok(TreeHashCache::from_bytes( + merkleize((*self as u8).to_le_bytes().to_vec()), + false, + None, + )?) + } + + fn tree_hash_cache_schema(&self, depth: usize) -> BTreeSchema { + BTreeSchema::from_lengths(depth, vec![1]) + } + + fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { + let leaf = merkleize((*self as u8).to_le_bytes().to_vec()); + cache.maybe_update_chunk(cache.chunk_index, &leaf)?; + + cache.chunk_index += 1; + + Ok(()) + } +} + +impl CachedTreeHash for [u8; 4] { + fn new_tree_hash_cache(&self, _depth: usize) -> Result { + Ok(TreeHashCache::from_bytes( + merkleize(self.to_vec()), + false, + None, + )?) + } + + fn tree_hash_cache_schema(&self, depth: usize) -> BTreeSchema { + BTreeSchema::from_lengths(depth, vec![1]) + } + + fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { + let leaf = merkleize(self.to_vec()); + cache.maybe_update_chunk(cache.chunk_index, &leaf)?; + + cache.chunk_index += 1; + + Ok(()) + } +} + +impl CachedTreeHash for H256 { + fn new_tree_hash_cache(&self, _depth: usize) -> Result { + Ok(TreeHashCache::from_bytes( + self.as_bytes().to_vec(), + false, + None, + )?) + } + + fn num_tree_hash_cache_chunks(&self) -> usize { + 1 + } + + fn tree_hash_cache_schema(&self, depth: usize) -> BTreeSchema { + BTreeSchema::from_lengths(depth, vec![1]) + } + + fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { + cache.maybe_update_chunk(cache.chunk_index, self.as_bytes())?; + + cache.chunk_index += 1; + + Ok(()) + } +} diff --git a/eth2/utils/cached_tree_hash/src/impls/vec.rs b/eth2/utils/cached_tree_hash/src/impls/vec.rs new file mode 100644 index 000000000..bdb7eb134 --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/impls/vec.rs @@ -0,0 +1,338 @@ +use super::*; +use crate::btree_overlay::LeafNode; +use crate::merkleize::{merkleize, num_sanitized_leaves, sanitise_bytes}; + +macro_rules! impl_for_list { + ($type: ty) => { + impl CachedTreeHash for $type + where + T: CachedTreeHash + TreeHash, + { + fn new_tree_hash_cache(&self, depth: usize) -> Result { + let (mut cache, schema) = new_tree_hash_cache(self, depth)?; + + cache.add_length_nodes(schema.into_overlay(0).chunk_range(), self.len())?; + + Ok(cache) + } + + fn num_tree_hash_cache_chunks(&self) -> usize { + // Add two extra nodes to cater for the node before and after to allow mixing-in length. + BTreeOverlay::new(self, 0, 0).num_chunks() + 2 + } + + fn tree_hash_cache_schema(&self, depth: usize) -> BTreeSchema { + produce_schema(self, depth) + } + + fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { + // Skip the length-mixed-in root node. + cache.chunk_index += 1; + + // Update the cache, returning the new overlay. + let new_overlay = update_tree_hash_cache(&self, cache)?; + + // Mix in length + cache.mix_in_length(new_overlay.chunk_range(), self.len())?; + + // Skip an extra node to clear the length node. + cache.chunk_index += 1; + + Ok(()) + } + } + }; +} + +impl_for_list!(Vec); +impl_for_list!(&[T]); + +/// Build a new tree hash cache for some slice. +/// +/// Valid for both variable- and fixed-length slices. Does _not_ mix-in the length of the list, +/// the caller must do this. +pub fn new_tree_hash_cache( + vec: &[T], + depth: usize, +) -> Result<(TreeHashCache, BTreeSchema), Error> { + let schema = vec.tree_hash_cache_schema(depth); + + let cache = match T::tree_hash_type() { + TreeHashType::Basic => TreeHashCache::from_bytes( + merkleize(get_packed_leaves(vec)?), + false, + Some(schema.clone()), + ), + TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { + let subtrees = vec + .iter() + .map(|item| TreeHashCache::new_at_depth(item, depth + 1)) + .collect::, _>>()?; + + TreeHashCache::from_subtrees(&vec, subtrees, depth) + } + }?; + + Ok((cache, schema)) +} + +/// Produce a schema for some slice. +/// +/// Valid for both variable- and fixed-length slices. Does _not_ add the mix-in length nodes, the +/// caller must do this. +pub fn produce_schema(vec: &[T], depth: usize) -> BTreeSchema { + let lengths = match T::tree_hash_type() { + TreeHashType::Basic => { + // Ceil division. + let num_leaves = + (vec.len() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(); + + // Disallow zero-length as an empty list still has one all-padding node. + vec![1; std::cmp::max(1, num_leaves)] + } + TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { + let mut lengths = vec![]; + + for item in vec { + lengths.push(item.num_tree_hash_cache_chunks()) + } + + lengths + } + }; + + BTreeSchema::from_lengths(depth, lengths) +} + +/// Updates the cache for some slice. +/// +/// Valid for both variable- and fixed-length slices. Does _not_ cater for the mix-in length nodes, +/// the caller must do this. +#[allow(clippy::range_plus_one)] // Minor readability lint requiring structural changes; not worth it. +pub fn update_tree_hash_cache( + vec: &[T], + cache: &mut TreeHashCache, +) -> Result { + let old_overlay = cache.get_overlay(cache.schema_index, cache.chunk_index)?; + let new_overlay = BTreeOverlay::new(&vec, cache.chunk_index, old_overlay.depth); + + cache.replace_overlay(cache.schema_index, cache.chunk_index, new_overlay.clone())?; + + cache.schema_index += 1; + + match T::tree_hash_type() { + TreeHashType::Basic => { + let mut buf = vec![0; HASHSIZE]; + let item_bytes = HASHSIZE / T::tree_hash_packing_factor(); + + // If the number of leaf nodes has changed, resize the cache. + if new_overlay.num_leaf_nodes() < old_overlay.num_leaf_nodes() { + let start = new_overlay.next_node(); + let end = start + (old_overlay.num_leaf_nodes() - new_overlay.num_leaf_nodes()); + + cache.splice(start..end, vec![], vec![]); + } else if new_overlay.num_leaf_nodes() > old_overlay.num_leaf_nodes() { + let start = old_overlay.next_node(); + let new_nodes = new_overlay.num_leaf_nodes() - old_overlay.num_leaf_nodes(); + + cache.splice( + start..start, + vec![0; new_nodes * HASHSIZE], + vec![true; new_nodes], + ); + } + + // Iterate through each of the leaf nodes in the new list. + for i in 0..new_overlay.num_leaf_nodes() { + // Iterate through the number of items that may be packing into the leaf node. + for j in 0..T::tree_hash_packing_factor() { + // Create a mut slice that can be filled with either a serialized item or + // padding. + let buf_slice = &mut buf[j * item_bytes..(j + 1) * item_bytes]; + + // Attempt to get the item for this portion of the chunk. If it exists, + // update `buf` with it's serialized bytes. If it doesn't exist, update + // `buf` with padding. + match vec.get(i * T::tree_hash_packing_factor() + j) { + Some(item) => { + buf_slice.copy_from_slice(&item.tree_hash_packed_encoding()); + } + None => buf_slice.copy_from_slice(&vec![0; item_bytes]), + } + } + + // Update the chunk if the generated `buf` is not the same as the cache. + let chunk = new_overlay.first_leaf_node() + i; + cache.maybe_update_chunk(chunk, &buf)?; + } + } + TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { + let longest_len = + std::cmp::max(new_overlay.num_leaf_nodes(), old_overlay.num_leaf_nodes()); + + let old_leaf_nodes = old_overlay.get_leaf_nodes(longest_len); + let new_leaf_nodes = if old_overlay == new_overlay { + old_leaf_nodes.clone() + } else { + new_overlay.get_leaf_nodes(longest_len) + }; + + for i in 0..longest_len { + match (&old_leaf_nodes[i], &new_leaf_nodes[i]) { + // The item existed in the previous list and exists in the current list. + // + // Update the item. + (LeafNode::Exists(_old), LeafNode::Exists(new)) => { + cache.chunk_index = new.start; + + vec[i].update_tree_hash_cache(cache)?; + } + // The list has been lengthened and this is a new item that did not exist in + // the previous list. + // + // Splice the tree for the new item into the current chunk_index. + (LeafNode::DoesNotExist, LeafNode::Exists(new)) => { + splice_in_new_tree( + &vec[i], + new.start..new.start, + new_overlay.depth + 1, + cache, + )?; + + cache.chunk_index = new.end; + } + // The list has been lengthened and this is a new item that was prevously a + // padding item. + // + // Splice the tree for the new item over the padding chunk. + (LeafNode::Padding, LeafNode::Exists(new)) => { + splice_in_new_tree( + &vec[i], + new.start..new.start + 1, + new_overlay.depth + 1, + cache, + )?; + + cache.chunk_index = new.end; + } + // The list has been shortened and this item was removed from the list and made + // into padding. + // + // Splice a padding node over the number of nodes the previous item occupied, + // starting at the current chunk_index. + (LeafNode::Exists(old), LeafNode::Padding) => { + let num_chunks = old.end - old.start; + + cache.splice( + cache.chunk_index..cache.chunk_index + num_chunks, + vec![0; HASHSIZE], + vec![true], + ); + + cache.chunk_index += 1; + } + // The list has been shortened and the item for this leaf existed in the + // previous list, but does not exist in this list. + // + // Remove the number of nodes the previous item occupied, starting at the + // current chunk_index. + (LeafNode::Exists(old), LeafNode::DoesNotExist) => { + let num_chunks = old.end - old.start; + + cache.splice( + cache.chunk_index..cache.chunk_index + num_chunks, + vec![], + vec![], + ); + } + // The list has been shortened and this leaf was padding in the previous list, + // however it should not exist in this list. + // + // Remove one node, starting at the current `chunk_index`. + (LeafNode::Padding, LeafNode::DoesNotExist) => { + cache.splice(cache.chunk_index..cache.chunk_index + 1, vec![], vec![]); + } + // The list has been lengthened and this leaf did not exist in the previous + // list, but should be padding for this list. + // + // Splice in a new padding node at the current chunk_index. + (LeafNode::DoesNotExist, LeafNode::Padding) => { + cache.splice( + cache.chunk_index..cache.chunk_index, + vec![0; HASHSIZE], + vec![true], + ); + + cache.chunk_index += 1; + } + // This leaf was padding in both lists, there's nothing to do. + (LeafNode::Padding, LeafNode::Padding) => (), + // As we are looping through the larger of the lists of leaf nodes, it should + // be impossible for either leaf to be non-existant. + (LeafNode::DoesNotExist, LeafNode::DoesNotExist) => unreachable!(), + } + } + + // Clean out any excess schemas that may or may not be remaining if the list was + // shortened. + cache.remove_proceeding_child_schemas(cache.schema_index, new_overlay.depth); + } + } + + cache.update_internal_nodes(&new_overlay)?; + + cache.chunk_index = new_overlay.next_node(); + + Ok(new_overlay) +} + +/// Create a new `TreeHashCache` from `item` and splice it over the `chunks_to_replace` chunks of +/// the given `cache`. +/// +/// Useful for the case where a new element is added to a list. +/// +/// The schemas created for `item` will have the given `depth`. +fn splice_in_new_tree( + item: &T, + chunks_to_replace: Range, + depth: usize, + cache: &mut TreeHashCache, +) -> Result<(), Error> +where + T: CachedTreeHash, +{ + let (bytes, mut bools, schemas) = TreeHashCache::new_at_depth(item, depth)?.into_components(); + + // Record the number of schemas, this will be used later in the fn. + let num_schemas = schemas.len(); + + // Flag the root node of the new tree as dirty. + bools[0] = true; + + cache.splice(chunks_to_replace, bytes, bools); + cache + .schemas + .splice(cache.schema_index..cache.schema_index, schemas); + + cache.schema_index += num_schemas; + + Ok(()) +} + +/// Packs all of the leaves of `vec` into a single byte-array, appending `0` to ensure the number +/// of chunks in the byte-array is a power-of-two. +fn get_packed_leaves(vec: &[T]) -> Result, Error> +where + T: CachedTreeHash, +{ + let num_packed_bytes = (BYTES_PER_CHUNK / T::tree_hash_packing_factor()) * vec.len(); + let num_leaves = num_sanitized_leaves(num_packed_bytes); + + let mut packed = Vec::with_capacity(num_leaves * HASHSIZE); + + for item in vec { + packed.append(&mut item.tree_hash_packed_encoding()); + } + + Ok(sanitise_bytes(packed)) +} diff --git a/eth2/utils/cached_tree_hash/src/lib.rs b/eth2/utils/cached_tree_hash/src/lib.rs new file mode 100644 index 000000000..21fa786e4 --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/lib.rs @@ -0,0 +1,150 @@ +//! Performs cached merkle-hashing adhering to the Ethereum 2.0 specification defined +//! [here](https://github.com/ethereum/eth2.0-specs/blob/v0.5.1/specs/simple-serialize.md#merkleization). +//! +//! Caching allows for reduced hashing when some object has only been partially modified. This +//! allows for significant CPU-time savings (at the cost of additional storage). For example, +//! determining the root of a list of 1024 items with a single modification has been observed to +//! run in 1/25th of the time of a full merkle hash. +//! +//! +//! # Example: +//! +//! ``` +//! use cached_tree_hash::TreeHashCache; +//! use tree_hash_derive::{TreeHash, CachedTreeHash}; +//! +//! #[derive(TreeHash, CachedTreeHash)] +//! struct Foo { +//! bar: u64, +//! baz: Vec +//! } +//! +//! let mut foo = Foo { +//! bar: 1, +//! baz: vec![0, 1, 2] +//! }; +//! +//! let mut cache = TreeHashCache::new(&foo).unwrap(); +//! +//! foo.baz[1] = 0; +//! +//! cache.update(&foo).unwrap(); +//! +//! println!("Root is: {:?}", cache.tree_hash_root().unwrap()); +//! ``` + +use hashing::hash; +use std::ops::Range; +use tree_hash::{TreeHash, TreeHashType, BYTES_PER_CHUNK, HASHSIZE}; + +mod btree_overlay; +mod errors; +mod impls; +pub mod merkleize; +mod resize; +mod tree_hash_cache; + +pub use btree_overlay::{BTreeOverlay, BTreeSchema}; +pub use errors::Error; +pub use impls::vec; +pub use tree_hash_cache::TreeHashCache; + +pub trait CachedTreeHash: TreeHash { + fn tree_hash_cache_schema(&self, depth: usize) -> BTreeSchema; + + fn num_tree_hash_cache_chunks(&self) -> usize { + self.tree_hash_cache_schema(0).into_overlay(0).num_chunks() + } + + fn new_tree_hash_cache(&self, depth: usize) -> Result; + + fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error>; +} + +/// Implements `CachedTreeHash` on `$type` as a fixed-length tree-hash vector of the ssz encoding +/// of `$type`. +#[macro_export] +macro_rules! cached_tree_hash_ssz_encoding_as_vector { + ($type: ident, $num_bytes: expr) => { + impl cached_tree_hash::CachedTreeHash for $type { + fn new_tree_hash_cache( + &self, + depth: usize, + ) -> Result { + let (cache, _schema) = + cached_tree_hash::vec::new_tree_hash_cache(&ssz::ssz_encode(self), depth)?; + + Ok(cache) + } + + fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { + let lengths = + vec![1; cached_tree_hash::merkleize::num_unsanitized_leaves($num_bytes)]; + cached_tree_hash::BTreeSchema::from_lengths(depth, lengths) + } + + fn update_tree_hash_cache( + &self, + cache: &mut cached_tree_hash::TreeHashCache, + ) -> Result<(), cached_tree_hash::Error> { + cached_tree_hash::vec::update_tree_hash_cache(&ssz::ssz_encode(self), cache)?; + + Ok(()) + } + } + }; +} + +/// Implements `CachedTreeHash` on `$type` as a variable-length tree-hash list of the result of +/// calling `.as_bytes()` on `$type`. +#[macro_export] +macro_rules! cached_tree_hash_bytes_as_list { + ($type: ident) => { + impl cached_tree_hash::CachedTreeHash for $type { + fn new_tree_hash_cache( + &self, + depth: usize, + ) -> Result { + let bytes = self.to_bytes(); + + let (mut cache, schema) = + cached_tree_hash::vec::new_tree_hash_cache(&bytes, depth)?; + + cache.add_length_nodes(schema.into_overlay(0).chunk_range(), bytes.len())?; + + Ok(cache) + } + + fn num_tree_hash_cache_chunks(&self) -> usize { + // Add two extra nodes to cater for the node before and after to allow mixing-in length. + cached_tree_hash::BTreeOverlay::new(self, 0, 0).num_chunks() + 2 + } + + fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { + let bytes = self.to_bytes(); + cached_tree_hash::vec::produce_schema(&bytes, depth) + } + + fn update_tree_hash_cache( + &self, + cache: &mut cached_tree_hash::TreeHashCache, + ) -> Result<(), cached_tree_hash::Error> { + let bytes = self.to_bytes(); + + // Skip the length-mixed-in root node. + cache.chunk_index += 1; + + // Update the cache, returning the new overlay. + let new_overlay = cached_tree_hash::vec::update_tree_hash_cache(&bytes, cache)?; + + // Mix in length + cache.mix_in_length(new_overlay.chunk_range(), bytes.len())?; + + // Skip an extra node to clear the length node. + cache.chunk_index += 1; + + Ok(()) + } + } + }; +} diff --git a/eth2/utils/cached_tree_hash/src/merkleize.rs b/eth2/utils/cached_tree_hash/src/merkleize.rs new file mode 100644 index 000000000..9d8c83200 --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/merkleize.rs @@ -0,0 +1,83 @@ +use hashing::hash; +use tree_hash::{BYTES_PER_CHUNK, HASHSIZE, MERKLE_HASH_CHUNK}; + +/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize +/// them, returning the entire merkle tree. +/// +/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`. +pub fn merkleize(values: Vec) -> Vec { + let values = sanitise_bytes(values); + + let leaves = values.len() / HASHSIZE; + + if leaves == 0 { + panic!("No full leaves"); + } + + if !leaves.is_power_of_two() { + panic!("leaves is not power of two"); + } + + let mut o: Vec = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE]; + o.append(&mut values.to_vec()); + + let mut i = o.len(); + let mut j = o.len() - values.len(); + + while i >= MERKLE_HASH_CHUNK { + i -= MERKLE_HASH_CHUNK; + let hash = hash(&o[i..i + MERKLE_HASH_CHUNK]); + + j -= HASHSIZE; + o[j..j + HASHSIZE].copy_from_slice(&hash); + } + + o +} + +/// Ensures that the given `bytes` are a power-of-two chunks, padding with zero if not. +pub fn sanitise_bytes(mut bytes: Vec) -> Vec { + let present_leaves = num_unsanitized_leaves(bytes.len()); + let required_leaves = present_leaves.next_power_of_two(); + + if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) { + bytes.resize(num_bytes(required_leaves), 0); + } + + bytes +} + +/// Pads out `bytes` to ensure it is a clean `num_leaves` chunks. +pub fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { + let required_leaves = num_leaves.next_power_of_two(); + + bytes.resize( + bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK, + 0, + ); +} + +fn last_leaf_needs_padding(num_bytes: usize) -> bool { + num_bytes % HASHSIZE != 0 +} + +/// Returns the number of leaves for a given `bytes_len` number of bytes, rounding up if +/// `num_bytes` is not a client multiple of chunk size. +pub fn num_unsanitized_leaves(bytes_len: usize) -> usize { + (bytes_len + HASHSIZE - 1) / HASHSIZE +} + +fn num_bytes(num_leaves: usize) -> usize { + num_leaves * HASHSIZE +} + +fn num_nodes(num_leaves: usize) -> usize { + 2 * num_leaves - 1 +} + +/// Returns the power-of-two number of leaves that would result from the given `bytes_len` number +/// of bytes. +pub fn num_sanitized_leaves(bytes_len: usize) -> usize { + let leaves = (bytes_len + HASHSIZE - 1) / HASHSIZE; + leaves.next_power_of_two() +} diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/resize.rs b/eth2/utils/cached_tree_hash/src/resize.rs similarity index 66% rename from eth2/utils/tree_hash/src/cached_tree_hash/resize.rs rename to eth2/utils/cached_tree_hash/src/resize.rs index 44b3f0ea5..5428e234b 100644 --- a/eth2/utils/tree_hash/src/cached_tree_hash/resize.rs +++ b/eth2/utils/cached_tree_hash/src/resize.rs @@ -1,55 +1,26 @@ +#![allow(clippy::range_plus_one)] // Minor readability lint requiring structural changes; not worth it. + use super::*; /// New vec is bigger than old vec. -pub fn grow_merkle_cache( +pub fn grow_merkle_tree( old_bytes: &[u8], old_flags: &[bool], from_height: usize, to_height: usize, ) -> Option<(Vec, Vec)> { - // Determine the size of our new tree. It is not just a simple `1 << to_height` as there can be - // an arbitrary number of nodes in `old_bytes` leaves if those leaves are subtrees. - let to_nodes = { - let old_nodes = old_bytes.len() / HASHSIZE; - let additional_nodes = old_nodes - nodes_in_tree_of_height(from_height); - nodes_in_tree_of_height(to_height) + additional_nodes - }; + let to_nodes = nodes_in_tree_of_height(to_height); let mut bytes = vec![0; to_nodes * HASHSIZE]; let mut flags = vec![true; to_nodes]; - let leaf_level = from_height; + for i in 0..=from_height { + let old_byte_slice = old_bytes.get(byte_range_at_height(i))?; + let old_flag_slice = old_flags.get(node_range_at_height(i))?; - for i in 0..=from_height as usize { - // If we're on the leaf slice, grab the first byte and all the of the bytes after that. - // This is required because we can have an arbitrary number of bytes at the leaf level - // (e.g., the case where there are subtrees as leaves). - // - // If we're not on a leaf level, the number of nodes is fixed and known. - let (old_byte_slice, old_flag_slice) = if i == leaf_level { - ( - old_bytes.get(first_byte_at_height(i)..)?, - old_flags.get(first_node_at_height(i)..)?, - ) - } else { - ( - old_bytes.get(byte_range_at_height(i))?, - old_flags.get(node_range_at_height(i))?, - ) - }; - - let new_i = i + to_height - from_height; - let (new_byte_slice, new_flag_slice) = if i == leaf_level { - ( - bytes.get_mut(first_byte_at_height(new_i)..)?, - flags.get_mut(first_node_at_height(new_i)..)?, - ) - } else { - ( - bytes.get_mut(byte_range_at_height(new_i))?, - flags.get_mut(node_range_at_height(new_i))?, - ) - }; + let offset = i + to_height - from_height; + let new_byte_slice = bytes.get_mut(byte_range_at_height(offset))?; + let new_flag_slice = flags.get_mut(node_range_at_height(offset))?; new_byte_slice .get_mut(0..old_byte_slice.len())? @@ -63,42 +34,24 @@ pub fn grow_merkle_cache( } /// New vec is smaller than old vec. -pub fn shrink_merkle_cache( +pub fn shrink_merkle_tree( from_bytes: &[u8], from_flags: &[bool], from_height: usize, to_height: usize, - to_nodes: usize, ) -> Option<(Vec, Vec)> { + let to_nodes = nodes_in_tree_of_height(to_height); + let mut bytes = vec![0; to_nodes * HASHSIZE]; let mut flags = vec![true; to_nodes]; for i in 0..=to_height as usize { - let from_i = i + from_height - to_height; + let offset = i + from_height - to_height; + let from_byte_slice = from_bytes.get(byte_range_at_height(offset))?; + let from_flag_slice = from_flags.get(node_range_at_height(offset))?; - let (from_byte_slice, from_flag_slice) = if from_i == from_height { - ( - from_bytes.get(first_byte_at_height(from_i)..)?, - from_flags.get(first_node_at_height(from_i)..)?, - ) - } else { - ( - from_bytes.get(byte_range_at_height(from_i))?, - from_flags.get(node_range_at_height(from_i))?, - ) - }; - - let (to_byte_slice, to_flag_slice) = if i == to_height { - ( - bytes.get_mut(first_byte_at_height(i)..)?, - flags.get_mut(first_node_at_height(i)..)?, - ) - } else { - ( - bytes.get_mut(byte_range_at_height(i))?, - flags.get_mut(node_range_at_height(i))?, - ) - }; + let to_byte_slice = bytes.get_mut(byte_range_at_height(i))?; + let to_flag_slice = flags.get_mut(node_range_at_height(i))?; to_byte_slice.copy_from_slice(from_byte_slice.get(0..to_byte_slice.len())?); to_flag_slice.copy_from_slice(from_flag_slice.get(0..to_flag_slice.len())?); @@ -107,7 +60,7 @@ pub fn shrink_merkle_cache( Some((bytes, flags)) } -fn nodes_in_tree_of_height(h: usize) -> usize { +pub fn nodes_in_tree_of_height(h: usize) -> usize { 2 * (1 << h) - 1 } @@ -120,10 +73,6 @@ fn node_range_at_height(h: usize) -> Range { first_node_at_height(h)..last_node_at_height(h) + 1 } -fn first_byte_at_height(h: usize) -> usize { - first_node_at_height(h) * HASHSIZE -} - fn first_node_at_height(h: usize) -> usize { (1 << h) - 1 } @@ -144,7 +93,7 @@ mod test { let original_bytes = vec![42; small * HASHSIZE]; let original_flags = vec![false; small]; - let (grown_bytes, grown_flags) = grow_merkle_cache( + let (grown_bytes, grown_flags) = grow_merkle_tree( &original_bytes, &original_flags, (small + 1).trailing_zeros() as usize - 1, @@ -192,12 +141,11 @@ mod test { assert_eq!(expected_bytes, grown_bytes); assert_eq!(expected_flags, grown_flags); - let (shrunk_bytes, shrunk_flags) = shrink_merkle_cache( + let (shrunk_bytes, shrunk_flags) = shrink_merkle_tree( &grown_bytes, &grown_flags, (big + 1).trailing_zeros() as usize - 1, (small + 1).trailing_zeros() as usize - 1, - small, ) .unwrap(); @@ -213,7 +161,7 @@ mod test { let original_bytes = vec![42; small * HASHSIZE]; let original_flags = vec![false; small]; - let (grown_bytes, grown_flags) = grow_merkle_cache( + let (grown_bytes, grown_flags) = grow_merkle_tree( &original_bytes, &original_flags, (small + 1).trailing_zeros() as usize - 1, @@ -261,12 +209,11 @@ mod test { assert_eq!(expected_bytes, grown_bytes); assert_eq!(expected_flags, grown_flags); - let (shrunk_bytes, shrunk_flags) = shrink_merkle_cache( + let (shrunk_bytes, shrunk_flags) = shrink_merkle_tree( &grown_bytes, &grown_flags, (big + 1).trailing_zeros() as usize - 1, (small + 1).trailing_zeros() as usize - 1, - small, ) .unwrap(); diff --git a/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs b/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs new file mode 100644 index 000000000..8f7b9de86 --- /dev/null +++ b/eth2/utils/cached_tree_hash/src/tree_hash_cache.rs @@ -0,0 +1,446 @@ +#![allow(clippy::range_plus_one)] // Minor readability lint requiring structural changes; not worth it. + +use super::*; +use crate::merkleize::{merkleize, pad_for_leaf_count}; +use int_to_bytes::int_to_bytes32; + +/// Provides cached tree hashing for some object implementing `CachedTreeHash`. +/// +/// Caching allows for doing minimal internal-node hashing when an object has only been partially +/// changed. +/// +/// See the crate root for an example. +#[derive(Debug, PartialEq, Clone)] +pub struct TreeHashCache { + /// Stores the binary-tree in 32-byte chunks. + pub bytes: Vec, + /// Maps to each chunk of `self.bytes`, indicating if the chunk is dirty. + pub chunk_modified: Vec, + /// Contains a schema for each variable-length item stored in the cache. + pub schemas: Vec, + + /// A counter used during updates. + pub chunk_index: usize, + /// A counter used during updates. + pub schema_index: usize, +} + +impl Default for TreeHashCache { + /// Create an empty cache. + /// + /// Note: an empty cache is effectively useless, an error will be raised if `self.update` is + /// called. + fn default() -> TreeHashCache { + TreeHashCache { + bytes: vec![], + chunk_modified: vec![], + schemas: vec![], + chunk_index: 0, + schema_index: 0, + } + } +} + +impl TreeHashCache { + /// Instantiates a new cache from `item` at a depth of `0`. + /// + /// The returned cache is fully-built and will return an accurate tree-hash root. + pub fn new(item: &T) -> Result + where + T: CachedTreeHash, + { + Self::new_at_depth(item, 0) + } + + /// Instantiates a new cache from `item` at the specified `depth`. + /// + /// The returned cache is fully-built and will return an accurate tree-hash root. + pub fn new_at_depth(item: &T, depth: usize) -> Result + where + T: CachedTreeHash, + { + item.new_tree_hash_cache(depth) + } + + /// Updates the cache with `item`. + /// + /// `item` _must_ be of the same type as the `item` used to build the cache, otherwise an error + /// may be returned. + /// + /// After calling `update`, the cache will return an accurate tree-hash root using + /// `self.tree_hash_root()`. + pub fn update(&mut self, item: &T) -> Result<(), Error> + where + T: CachedTreeHash, + { + if self.is_empty() { + Err(Error::CacheNotInitialized) + } else { + self.reset_modifications(); + + item.update_tree_hash_cache(self) + } + } + + /// Builds a new cache for `item`, given `subtrees` contains a `Self` for field/item of `item`. + /// + /// Each `subtree` in `subtree` will become a leaf-node of the merkle-tree of `item`. + pub fn from_subtrees(item: &T, subtrees: Vec, depth: usize) -> Result + where + T: CachedTreeHash, + { + let overlay = BTreeOverlay::new(item, 0, depth); + + // Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out + // later. + let num_provided_leaf_nodes = subtrees.len(); + + // Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill + // all the to-be-built internal nodes with zeros and append the leaves and subtrees. + let internal_node_bytes = overlay.num_internal_nodes() * BYTES_PER_CHUNK; + let subtrees_bytes = subtrees.iter().fold(0, |acc, t| acc + t.bytes.len()); + let mut bytes = Vec::with_capacity(subtrees_bytes + internal_node_bytes); + bytes.resize(internal_node_bytes, 0); + + // Allocate enough bytes to store all the leaves. + let mut leaves = Vec::with_capacity(overlay.num_leaf_nodes() * HASHSIZE); + let mut schemas = Vec::with_capacity(subtrees.len()); + + if T::tree_hash_type() == TreeHashType::List { + schemas.push(overlay.into()); + } + + // Iterate through all of the leaves/subtrees, adding their root as a leaf node and then + // concatenating their merkle trees. + for t in subtrees { + leaves.append(&mut t.tree_hash_root()?.to_vec()); + + let (mut t_bytes, _bools, mut t_schemas) = t.into_components(); + bytes.append(&mut t_bytes); + schemas.append(&mut t_schemas); + } + + // Pad the leaves to an even power-of-two, using zeros. + pad_for_leaf_count(num_provided_leaf_nodes, &mut bytes); + + // Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros + // internal nodes created earlier with the internal nodes generated by `merkleize`. + let mut merkleized = merkleize(leaves); + merkleized.split_off(internal_node_bytes); + bytes.splice(0..internal_node_bytes, merkleized); + + Ok(Self { + chunk_modified: vec![true; bytes.len() / BYTES_PER_CHUNK], + bytes, + schemas, + chunk_index: 0, + schema_index: 0, + }) + } + + /// Instantiate a new cache from the pre-built `bytes` where each `self.chunk_modified` will be + /// set to `intitial_modified_state`. + /// + /// Note: `bytes.len()` must be a multiple of 32 + pub fn from_bytes( + bytes: Vec, + initial_modified_state: bool, + schema: Option, + ) -> Result { + if bytes.len() % BYTES_PER_CHUNK > 0 { + return Err(Error::BytesAreNotEvenChunks(bytes.len())); + } + + let schemas = match schema { + Some(schema) => vec![schema], + None => vec![], + }; + + Ok(Self { + chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK], + bytes, + schemas, + chunk_index: 0, + schema_index: 0, + }) + } + + /// Returns `true` if this cache is empty (i.e., it has never been built for some item). + /// + /// Note: an empty cache is effectively useless, an error will be raised if `self.update` is + /// called. + pub fn is_empty(&self) -> bool { + self.chunk_modified.is_empty() + } + + /// Return an overlay, built from the schema at `schema_index` with an offset of `chunk_index`. + pub fn get_overlay( + &self, + schema_index: usize, + chunk_index: usize, + ) -> Result { + Ok(self + .schemas + .get(schema_index) + .ok_or_else(|| Error::NoSchemaForIndex(schema_index))? + .clone() + .into_overlay(chunk_index)) + } + + /// Resets the per-update counters, allowing a new update to start. + /// + /// Note: this does _not_ delete the contents of the cache. + pub fn reset_modifications(&mut self) { + // Reset the per-hash counters. + self.chunk_index = 0; + self.schema_index = 0; + + for chunk_modified in &mut self.chunk_modified { + *chunk_modified = false; + } + } + + /// Replace the schema at `schema_index` with the schema derived from `new_overlay`. + /// + /// If the `new_overlay` schema has a different number of internal nodes to the schema at + /// `schema_index`, the cache will be updated to add/remove these new internal nodes. + pub fn replace_overlay( + &mut self, + schema_index: usize, + // TODO: remove chunk index (if possible) + chunk_index: usize, + new_overlay: BTreeOverlay, + ) -> Result { + let old_overlay = self.get_overlay(schema_index, chunk_index)?; + // If the merkle tree required to represent the new list is of a different size to the one + // required for the previous list, then update the internal nodes. + // + // Leaf nodes are not touched, they should be updated externally to this function. + // + // This grows/shrinks the bytes to accommodate the new tree, preserving as much of the tree + // as possible. + if new_overlay.num_internal_nodes() != old_overlay.num_internal_nodes() { + // Get slices of the existing tree from the cache. + let (old_bytes, old_flags) = self + .slices(old_overlay.internal_chunk_range()) + .ok_or_else(|| Error::UnableToObtainSlices)?; + + let (new_bytes, new_flags) = if new_overlay.num_internal_nodes() == 0 { + // The new tree has zero internal nodes, simply return empty lists. + (vec![], vec![]) + } else if old_overlay.num_internal_nodes() == 0 { + // The old tree has zero nodes and the new tree has some nodes. Create new nodes to + // suit. + let nodes = resize::nodes_in_tree_of_height(new_overlay.height() - 1); + + (vec![0; nodes * HASHSIZE], vec![true; nodes]) + } else if new_overlay.num_internal_nodes() > old_overlay.num_internal_nodes() { + // The new tree is bigger than the old tree. + // + // Grow the internal nodes, preserving any existing nodes. + resize::grow_merkle_tree( + old_bytes, + old_flags, + old_overlay.height() - 1, + new_overlay.height() - 1, + ) + .ok_or_else(|| Error::UnableToGrowMerkleTree)? + } else { + // The new tree is smaller than the old tree. + // + // Shrink the internal nodes, preserving any existing nodes. + resize::shrink_merkle_tree( + old_bytes, + old_flags, + old_overlay.height() - 1, + new_overlay.height() - 1, + ) + .ok_or_else(|| Error::UnableToShrinkMerkleTree)? + }; + + // Splice the resized created elements over the existing elements, effectively updating + // the number of stored internal nodes for this tree. + self.splice(old_overlay.internal_chunk_range(), new_bytes, new_flags); + } + + let old_schema = std::mem::replace(&mut self.schemas[schema_index], new_overlay.into()); + + Ok(old_schema.into_overlay(chunk_index)) + } + + /// Remove all of the child schemas following `schema_index`. + /// + /// Schema `a` is a child of schema `b` if `a.depth < b.depth`. + pub fn remove_proceeding_child_schemas(&mut self, schema_index: usize, depth: usize) { + let end = self + .schemas + .iter() + .skip(schema_index) + .position(|o| o.depth <= depth) + .and_then(|i| Some(i + schema_index)) + .unwrap_or_else(|| self.schemas.len()); + + self.schemas.splice(schema_index..end, vec![]); + } + + /// Iterate through the internal nodes chunks of `overlay`, updating the chunk with the + /// merkle-root of it's children if either of those children are dirty. + pub fn update_internal_nodes(&mut self, overlay: &BTreeOverlay) -> Result<(), Error> { + for (parent, children) in overlay.internal_parents_and_children().into_iter().rev() { + if self.either_modified(children)? { + self.modify_chunk(parent, &self.hash_children(children)?)?; + } + } + + Ok(()) + } + + /// Returns to the tree-hash root of the cache. + pub fn tree_hash_root(&self) -> Result<&[u8], Error> { + if self.is_empty() { + Err(Error::CacheNotInitialized) + } else { + self.bytes + .get(0..HASHSIZE) + .ok_or_else(|| Error::NoBytesForRoot) + } + } + + /// Splices the given `bytes` over `self.bytes` and `bools` over `self.chunk_modified` at the + /// specified `chunk_range`. + pub fn splice(&mut self, chunk_range: Range, bytes: Vec, bools: Vec) { + // Update the `chunk_modified` vec, marking all spliced-in nodes as changed. + self.chunk_modified.splice(chunk_range.clone(), bools); + self.bytes + .splice(node_range_to_byte_range(&chunk_range), bytes); + } + + /// If the bytes at `chunk` are not the same as `to`, `self.bytes` is updated and + /// `self.chunk_modified` is set to `true`. + pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { + let start = chunk * BYTES_PER_CHUNK; + let end = start + BYTES_PER_CHUNK; + + if !self.chunk_equals(chunk, to)? { + self.bytes + .get_mut(start..end) + .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))? + .copy_from_slice(to); + self.chunk_modified[chunk] = true; + } + + Ok(()) + } + + /// Returns the slices of `self.bytes` and `self.chunk_modified` at the given `chunk_range`. + fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { + Some(( + self.bytes.get(node_range_to_byte_range(&chunk_range))?, + self.chunk_modified.get(chunk_range)?, + )) + } + + /// Updates `self.bytes` at `chunk` and sets `self.chunk_modified` for the `chunk` to `true`. + pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { + let start = chunk * BYTES_PER_CHUNK; + let end = start + BYTES_PER_CHUNK; + + self.bytes + .get_mut(start..end) + .ok_or_else(|| Error::NoBytesForChunk(chunk))? + .copy_from_slice(to); + + self.chunk_modified[chunk] = true; + + Ok(()) + } + + /// Returns the bytes at `chunk`. + fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { + let start = chunk * BYTES_PER_CHUNK; + let end = start + BYTES_PER_CHUNK; + + Ok(self + .bytes + .get(start..end) + .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?) + } + + /// Returns `true` if the bytes at `chunk` are equal to `other`. + fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { + Ok(self.get_chunk(chunk)? == other) + } + + /// Returns `true` if `chunk` is dirty. + pub fn changed(&self, chunk: usize) -> Result { + self.chunk_modified + .get(chunk) + .cloned() + .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk)) + } + + /// Returns `true` if either of the `children` chunks is dirty. + fn either_modified(&self, children: (usize, usize)) -> Result { + Ok(self.changed(children.0)? | self.changed(children.1)?) + } + + /// Returns the hash of the concatenation of the given `children`. + pub fn hash_children(&self, children: (usize, usize)) -> Result, Error> { + let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2); + child_bytes.append(&mut self.get_chunk(children.0)?.to_vec()); + child_bytes.append(&mut self.get_chunk(children.1)?.to_vec()); + + Ok(hash(&child_bytes)) + } + + /// Adds a chunk before and after the given `chunk` range and calls `self.mix_in_length()`. + pub fn add_length_nodes( + &mut self, + chunk_range: Range, + length: usize, + ) -> Result<(), Error> { + self.chunk_modified[chunk_range.start] = true; + + let byte_range = node_range_to_byte_range(&chunk_range); + + // Add the last node. + self.bytes + .splice(byte_range.end..byte_range.end, vec![0; HASHSIZE]); + self.chunk_modified + .splice(chunk_range.end..chunk_range.end, vec![false]); + + // Add the first node. + self.bytes + .splice(byte_range.start..byte_range.start, vec![0; HASHSIZE]); + self.chunk_modified + .splice(chunk_range.start..chunk_range.start, vec![false]); + + self.mix_in_length(chunk_range.start + 1..chunk_range.end + 1, length)?; + + Ok(()) + } + + /// Sets `chunk_range.end + 1` equal to the little-endian serialization of `length`. Sets + /// `chunk_range.start - 1` equal to `self.hash_children(chunk_range.start, chunk_range.end + 1)`. + pub fn mix_in_length(&mut self, chunk_range: Range, length: usize) -> Result<(), Error> { + // Update the length chunk. + self.maybe_update_chunk(chunk_range.end, &int_to_bytes32(length as u64))?; + + // Update the mixed-in root if the main root or the length have changed. + let children = (chunk_range.start, chunk_range.end); + if self.either_modified(children)? { + self.modify_chunk(chunk_range.start - 1, &self.hash_children(children)?)?; + } + + Ok(()) + } + + /// Returns `(self.bytes, self.chunk_modified, self.schemas)`. + pub fn into_components(self) -> (Vec, Vec, Vec) { + (self.bytes, self.chunk_modified, self.schemas) + } +} + +fn node_range_to_byte_range(node_range: &Range) -> Range { + node_range.start * HASHSIZE..node_range.end * HASHSIZE +} diff --git a/eth2/utils/cached_tree_hash/tests/tests.rs b/eth2/utils/cached_tree_hash/tests/tests.rs new file mode 100644 index 000000000..3e2598e2b --- /dev/null +++ b/eth2/utils/cached_tree_hash/tests/tests.rs @@ -0,0 +1,677 @@ +use cached_tree_hash::{merkleize::merkleize, *}; +use ethereum_types::H256 as Hash256; +use int_to_bytes::int_to_bytes32; +use tree_hash_derive::{CachedTreeHash, TreeHash}; + +#[test] +fn modifications() { + let n = 2048; + + let vec: Vec = (0..n).map(|_| Hash256::random()).collect(); + + let mut cache = TreeHashCache::new(&vec).unwrap(); + cache.update(&vec).unwrap(); + + let modifications = cache.chunk_modified.iter().filter(|b| **b).count(); + + assert_eq!(modifications, 0); + + let mut modified_vec = vec.clone(); + modified_vec[n - 1] = Hash256::random(); + + cache.update(&modified_vec).unwrap(); + + let modifications = cache.chunk_modified.iter().filter(|b| **b).count(); + + assert_eq!(modifications, n.trailing_zeros() as usize + 2); +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct NestedStruct { + pub a: u64, + pub b: Inner, +} + +fn test_routine(original: T, modified: Vec) +where + T: CachedTreeHash + std::fmt::Debug, +{ + let mut cache = TreeHashCache::new(&original).unwrap(); + + let standard_root = original.tree_hash_root(); + let cached_root = cache.tree_hash_root().unwrap(); + assert_eq!(standard_root, cached_root, "Initial cache build failed."); + + for (i, modified) in modified.iter().enumerate() { + println!("-- Start of modification {} --", i); + + // Update the existing hasher. + cache + .update(modified) + .expect(&format!("Modification {}", i)); + + // Create a new hasher from the "modified" struct. + let modified_cache = TreeHashCache::new(modified).unwrap(); + + assert_eq!( + cache.chunk_modified.len(), + modified_cache.chunk_modified.len(), + "Number of chunks is different" + ); + + assert_eq!( + cache.bytes.len(), + modified_cache.bytes.len(), + "Number of bytes is different" + ); + + assert_eq!(cache.bytes, modified_cache.bytes, "Bytes are different"); + + assert_eq!( + cache.schemas.len(), + modified_cache.schemas.len(), + "Number of schemas is different" + ); + + assert_eq!( + cache.schemas, modified_cache.schemas, + "Schemas are different" + ); + + // Test the root generated by the updated hasher matches a non-cached tree hash root. + let standard_root = modified.tree_hash_root(); + let cached_root = cache + .tree_hash_root() + .expect(&format!("Modification {}", i)); + assert_eq!( + standard_root, cached_root, + "Modification {} failed. \n Cache: {:?}", + i, cache + ); + } +} + +#[test] +fn test_nested_struct() { + let original = NestedStruct { + a: 42, + b: Inner { + a: 12, + b: 13, + c: 14, + d: 15, + }, + }; + let modified = vec![NestedStruct { + a: 99, + ..original.clone() + }]; + + test_routine(original, modified); +} + +#[test] +fn test_inner() { + let original = Inner { + a: 12, + b: 13, + c: 14, + d: 15, + }; + + let modified = vec![Inner { + a: 99, + ..original.clone() + }]; + + test_routine(original, modified); +} + +#[test] +fn test_vec_of_hash256() { + let n = 16; + + let original: Vec = (0..n).map(|_| Hash256::random()).collect(); + + let modified: Vec> = vec![ + original[..].to_vec(), + original[0..n / 2].to_vec(), + vec![], + original[0..1].to_vec(), + original[0..3].to_vec(), + original[0..n - 12].to_vec(), + ]; + + test_routine(original, modified); +} + +#[test] +fn test_vec_of_u64() { + let original: Vec = vec![1, 2, 3, 4, 5]; + + let modified: Vec> = vec![ + vec![1, 2, 3, 4, 42], + vec![1, 2, 3, 4], + vec![], + vec![42; 2_usize.pow(4)], + vec![], + vec![], + vec![1, 2, 3, 4, 42], + vec![1, 2, 3], + vec![1], + ]; + + test_routine(original, modified); +} + +#[test] +fn test_nested_list_of_u64() { + let original: Vec> = vec![vec![42]]; + + let modified = vec![ + vec![vec![1]], + vec![vec![1], vec![2]], + vec![vec![1], vec![3], vec![4]], + vec![], + vec![vec![1], vec![3], vec![4]], + vec![], + vec![vec![1, 2], vec![3], vec![4, 5, 6, 7, 8]], + vec![], + vec![vec![1], vec![2], vec![3]], + vec![vec![1, 2, 3, 4, 5, 6], vec![1, 2, 3, 4, 5, 6, 7]], + vec![vec![], vec![], vec![]], + vec![vec![0, 0, 0], vec![0], vec![0]], + ]; + + test_routine(original, modified); +} + +#[test] +fn test_shrinking_vec_of_vec() { + let original: Vec> = vec![vec![1], vec![2], vec![3], vec![4], vec![5]]; + let modified: Vec> = original[0..3].to_vec(); + + let new_cache = TreeHashCache::new(&modified).unwrap(); + + let mut modified_cache = TreeHashCache::new(&original).unwrap(); + modified_cache.update(&modified).unwrap(); + + assert_eq!( + new_cache.schemas.len(), + modified_cache.schemas.len(), + "Schema count is different" + ); + + assert_eq!( + new_cache.chunk_modified.len(), + modified_cache.chunk_modified.len(), + "Chunk count is different" + ); +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct StructWithVec { + pub a: u64, + pub b: Inner, + pub c: Vec, +} + +#[test] +fn test_struct_with_vec() { + let original = StructWithVec { + a: 42, + b: Inner { + a: 12, + b: 13, + c: 14, + d: 15, + }, + c: vec![1, 2, 3, 4, 5], + }; + + let modified = vec![ + StructWithVec { + a: 99, + ..original.clone() + }, + StructWithVec { + a: 100, + ..original.clone() + }, + StructWithVec { + c: vec![1, 2, 3, 4, 5], + ..original.clone() + }, + StructWithVec { + c: vec![1, 3, 4, 5, 6], + ..original.clone() + }, + StructWithVec { + c: vec![1, 3, 4, 5, 6, 7, 8, 9], + ..original.clone() + }, + StructWithVec { + c: vec![1, 3, 4, 5], + ..original.clone() + }, + StructWithVec { + b: Inner { + a: u64::max_value(), + b: u64::max_value(), + c: u64::max_value(), + d: u64::max_value(), + }, + c: vec![], + ..original.clone() + }, + StructWithVec { + b: Inner { + a: 0, + b: 1, + c: 2, + d: 3, + }, + ..original.clone() + }, + ]; + + test_routine(original, modified); +} + +#[test] +fn test_vec_of_struct_with_vec() { + let a = StructWithVec { + a: 42, + b: Inner { + a: 12, + b: 13, + c: 14, + d: 15, + }, + c: vec![1, 2, 3, 4, 5], + }; + let b = StructWithVec { + c: vec![], + ..a.clone() + }; + let c = StructWithVec { + b: Inner { + a: 99, + b: 100, + c: 101, + d: 102, + }, + ..a.clone() + }; + let d = StructWithVec { a: 0, ..a.clone() }; + + let original: Vec = vec![a.clone(), c.clone()]; + + let modified = vec![ + vec![a.clone(), c.clone()], + vec![], + vec![a.clone(), b.clone(), c.clone(), d.clone()], + vec![b.clone(), a.clone(), c.clone(), d.clone()], + vec![], + vec![a.clone()], + vec![], + vec![a.clone(), b.clone(), c.clone(), d.clone()], + ]; + + test_routine(original, modified); +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct StructWithVecOfStructs { + pub a: u64, + pub b: Inner, + pub c: Vec, +} + +fn get_inners() -> Vec { + vec![ + Inner { + a: 12, + b: 13, + c: 14, + d: 15, + }, + Inner { + a: 99, + b: 100, + c: 101, + d: 102, + }, + Inner { + a: 255, + b: 256, + c: 257, + d: 0, + }, + Inner { + a: 1000, + b: 2000, + c: 3000, + d: 0, + }, + Inner { + a: 0, + b: 0, + c: 0, + d: 0, + }, + ] +} + +fn get_struct_with_vec_of_structs() -> Vec { + let inner_a = Inner { + a: 12, + b: 13, + c: 14, + d: 15, + }; + + let inner_b = Inner { + a: 99, + b: 100, + c: 101, + d: 102, + }; + + let inner_c = Inner { + a: 255, + b: 256, + c: 257, + d: 0, + }; + + let a = StructWithVecOfStructs { + a: 42, + b: inner_a.clone(), + c: vec![inner_a.clone(), inner_b.clone(), inner_c.clone()], + }; + + let b = StructWithVecOfStructs { + c: vec![], + ..a.clone() + }; + + let c = StructWithVecOfStructs { + a: 800, + ..a.clone() + }; + + let d = StructWithVecOfStructs { + b: inner_c.clone(), + ..a.clone() + }; + + let e = StructWithVecOfStructs { + c: vec![inner_a.clone(), inner_b.clone()], + ..a.clone() + }; + + let f = StructWithVecOfStructs { + c: vec![inner_a.clone()], + ..a.clone() + }; + + vec![a, b, c, d, e, f] +} + +#[test] +fn test_struct_with_vec_of_structs() { + let variants = get_struct_with_vec_of_structs(); + + test_routine(variants[0].clone(), variants.clone()); + test_routine(variants[1].clone(), variants.clone()); + test_routine(variants[2].clone(), variants.clone()); + test_routine(variants[3].clone(), variants.clone()); + test_routine(variants[4].clone(), variants.clone()); + test_routine(variants[5].clone(), variants.clone()); +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct StructWithVecOfStructWithVecOfStructs { + pub a: Vec, + pub b: u64, +} + +#[test] +fn test_struct_with_vec_of_struct_with_vec_of_structs() { + let structs = get_struct_with_vec_of_structs(); + + let variants = vec![ + StructWithVecOfStructWithVecOfStructs { + a: structs[..].to_vec(), + b: 99, + }, + StructWithVecOfStructWithVecOfStructs { a: vec![], b: 99 }, + StructWithVecOfStructWithVecOfStructs { + a: structs[0..2].to_vec(), + b: 99, + }, + StructWithVecOfStructWithVecOfStructs { + a: structs[0..2].to_vec(), + b: 100, + }, + StructWithVecOfStructWithVecOfStructs { + a: structs[0..1].to_vec(), + b: 100, + }, + StructWithVecOfStructWithVecOfStructs { + a: structs[0..4].to_vec(), + b: 100, + }, + StructWithVecOfStructWithVecOfStructs { + a: structs[0..5].to_vec(), + b: 8, + }, + ]; + + for v in &variants { + test_routine(v.clone(), variants.clone()); + } +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct StructWithTwoVecs { + pub a: Vec, + pub b: Vec, +} + +fn get_struct_with_two_vecs() -> Vec { + let inners = get_inners(); + + vec![ + StructWithTwoVecs { + a: inners[..].to_vec(), + b: inners[..].to_vec(), + }, + StructWithTwoVecs { + a: inners[0..1].to_vec(), + b: inners[..].to_vec(), + }, + StructWithTwoVecs { + a: inners[0..1].to_vec(), + b: inners[0..2].to_vec(), + }, + StructWithTwoVecs { + a: inners[0..4].to_vec(), + b: inners[0..2].to_vec(), + }, + StructWithTwoVecs { + a: vec![], + b: inners[..].to_vec(), + }, + StructWithTwoVecs { + a: inners[..].to_vec(), + b: vec![], + }, + StructWithTwoVecs { + a: inners[0..3].to_vec(), + b: inners[0..1].to_vec(), + }, + ] +} + +#[test] +fn test_struct_with_two_vecs() { + let variants = get_struct_with_two_vecs(); + + for v in &variants { + test_routine(v.clone(), variants.clone()); + } +} + +#[test] +fn test_vec_of_struct_with_two_vecs() { + let structs = get_struct_with_two_vecs(); + + let variants = vec![ + structs[0..].to_vec(), + structs[0..2].to_vec(), + structs[2..3].to_vec(), + vec![], + structs[2..4].to_vec(), + ]; + + test_routine(variants[0].clone(), vec![variants[2].clone()]); + + for v in &variants { + test_routine(v.clone(), variants.clone()); + } +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct U64AndTwoStructs { + pub a: u64, + pub b: Inner, + pub c: Inner, +} + +#[test] +fn test_u64_and_two_structs() { + let inners = get_inners(); + + let variants = vec![ + U64AndTwoStructs { + a: 99, + b: inners[0].clone(), + c: inners[1].clone(), + }, + U64AndTwoStructs { + a: 10, + b: inners[2].clone(), + c: inners[3].clone(), + }, + U64AndTwoStructs { + a: 0, + b: inners[1].clone(), + c: inners[1].clone(), + }, + U64AndTwoStructs { + a: 0, + b: inners[1].clone(), + c: inners[1].clone(), + }, + ]; + + for v in &variants { + test_routine(v.clone(), variants.clone()); + } +} + +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] +pub struct Inner { + pub a: u64, + pub b: u64, + pub c: u64, + pub d: u64, +} + +fn generic_test(index: usize) { + let inner = Inner { + a: 1, + b: 2, + c: 3, + d: 4, + }; + + let mut cache = TreeHashCache::new(&inner).unwrap(); + + let changed_inner = match index { + 0 => Inner { + a: 42, + ..inner.clone() + }, + 1 => Inner { + b: 42, + ..inner.clone() + }, + 2 => Inner { + c: 42, + ..inner.clone() + }, + 3 => Inner { + d: 42, + ..inner.clone() + }, + _ => panic!("bad index"), + }; + + changed_inner.update_tree_hash_cache(&mut cache).unwrap(); + + let data1 = int_to_bytes32(1); + let data2 = int_to_bytes32(2); + let data3 = int_to_bytes32(3); + let data4 = int_to_bytes32(4); + + let mut data = vec![data1, data2, data3, data4]; + + data[index] = int_to_bytes32(42); + + let expected = merkleize(join(data)); + + let (cache_bytes, _, _) = cache.into_components(); + + assert_eq!(expected, cache_bytes); +} + +#[test] +fn cached_hash_on_inner() { + generic_test(0); + generic_test(1); + generic_test(2); + generic_test(3); +} + +#[test] +fn inner_builds() { + let data1 = int_to_bytes32(1); + let data2 = int_to_bytes32(2); + let data3 = int_to_bytes32(3); + let data4 = int_to_bytes32(4); + + let data = join(vec![data1, data2, data3, data4]); + let expected = merkleize(data); + + let inner = Inner { + a: 1, + b: 2, + c: 3, + d: 4, + }; + + let (cache_bytes, _, _) = TreeHashCache::new(&inner).unwrap().into_components(); + + assert_eq!(expected, cache_bytes); +} + +fn join(many: Vec>) -> Vec { + let mut all = vec![]; + for one in many { + all.extend_from_slice(&mut one.clone()) + } + all +} diff --git a/eth2/utils/tree_hash/Cargo.toml b/eth2/utils/tree_hash/Cargo.toml index 328d91577..7e23d2165 100644 --- a/eth2/utils/tree_hash/Cargo.toml +++ b/eth2/utils/tree_hash/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["Paul Hauner "] edition = "2018" +[dev-dependencies] +tree_hash_derive = { path = "../tree_hash_derive" } + [dependencies] ethereum-types = "0.5" hashing = { path = "../hashing" } diff --git a/eth2/utils/tree_hash/README.md b/eth2/utils/tree_hash/README.md new file mode 100644 index 000000000..0498bfc3e --- /dev/null +++ b/eth2/utils/tree_hash/README.md @@ -0,0 +1,76 @@ +# Tree hashing + +Provides both cached and non-cached tree hashing methods. + +## Standard Tree Hash + +```rust +use tree_hash_derive::TreeHash; + +#[derive(TreeHash)] +struct Foo { + a: u64, + b: Vec, +} + +fn main() { + let foo = Foo { + a: 42, + b: vec![1, 2, 3] + }; + + println!("root: {}", foo.tree_hash_root()); +} +``` + +## Cached Tree Hash + + +```rust +use tree_hash_derive::{TreeHash, CachedTreeHash}; + +#[derive(TreeHash, CachedTreeHash)] +struct Foo { + a: u64, + b: Vec, +} + +#[derive(TreeHash, CachedTreeHash)] +struct Bar { + a: Vec, + b: u64, +} + +fn main() { + let bar = Bar { + a: vec![ + Foo { + a: 42, + b: vec![1, 2, 3] + } + ], + b: 42 + }; + + let modified_bar = Bar { + a: vec![ + Foo { + a: 100, + b: vec![1, 2, 3, 4, 5, 6] + }, + Foo { + a: 42, + b: vec![] + } + ], + b: 99 + }; + + + let mut hasher = CachedTreeHasher::new(&bar).unwrap(); + hasher.update(&modified_bar).unwrap(); + + // Assert that the cached tree hash matches a standard tree hash. + assert_eq!(hasher.tree_hash_root(), modified_bar.tree_hash_root()); +} +``` diff --git a/eth2/utils/tree_hash/src/cached_tree_hash.rs b/eth2/utils/tree_hash/src/cached_tree_hash.rs deleted file mode 100644 index e093b2dd7..000000000 --- a/eth2/utils/tree_hash/src/cached_tree_hash.rs +++ /dev/null @@ -1,309 +0,0 @@ -use super::*; -use hashing::hash; -use int_to_bytes::int_to_bytes32; -use std::ops::Range; - -pub mod btree_overlay; -pub mod impls; -pub mod resize; - -pub use btree_overlay::BTreeOverlay; - -#[derive(Debug, PartialEq, Clone)] -pub enum Error { - ShouldNotProduceBTreeOverlay, - NoFirstNode, - NoBytesForRoot, - UnableToObtainSlices, - UnableToGrowMerkleTree, - UnableToShrinkMerkleTree, - ShouldNeverBePacked(TreeHashType), - BytesAreNotEvenChunks(usize), - NoModifiedFieldForChunk(usize), - NoBytesForChunk(usize), -} - -pub trait CachedTreeHash: CachedTreeHashSubTree + Sized { - fn update_internal_tree_hash_cache(self, old: T) -> Result<(Self, Self), Error>; - - fn cached_tree_hash_root(&self) -> Option>; - - fn clone_without_tree_hash_cache(&self) -> Self; -} - -pub trait CachedTreeHashSubTree: TreeHash { - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result; - - fn new_tree_hash_cache(&self) -> Result; - - fn update_tree_hash_cache( - &self, - other: &Item, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result; -} - -fn children(parent: usize) -> (usize, usize) { - ((2 * parent + 1), (2 * parent + 2)) -} - -fn node_range_to_byte_range(node_range: &Range) -> Range { - node_range.start * HASHSIZE..node_range.end * HASHSIZE -} - -/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize -/// them, returning the entire merkle tree. -/// -/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`. -pub fn merkleize(values: Vec) -> Vec { - let values = sanitise_bytes(values); - - let leaves = values.len() / HASHSIZE; - - if leaves == 0 { - panic!("No full leaves"); - } - - if !leaves.is_power_of_two() { - panic!("leaves is not power of two"); - } - - let mut o: Vec = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE]; - o.append(&mut values.to_vec()); - - let mut i = o.len(); - let mut j = o.len() - values.len(); - - while i >= MERKLE_HASH_CHUNCK { - i -= MERKLE_HASH_CHUNCK; - let hash = hash(&o[i..i + MERKLE_HASH_CHUNCK]); - - j -= HASHSIZE; - o[j..j + HASHSIZE].copy_from_slice(&hash); - } - - o -} - -pub fn sanitise_bytes(mut bytes: Vec) -> Vec { - let present_leaves = num_unsanitized_leaves(bytes.len()); - let required_leaves = present_leaves.next_power_of_two(); - - if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) { - bytes.resize(num_bytes(required_leaves), 0); - } - - bytes -} - -fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { - let required_leaves = num_leaves.next_power_of_two(); - - bytes.resize( - bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK, - 0, - ); -} - -fn last_leaf_needs_padding(num_bytes: usize) -> bool { - num_bytes % HASHSIZE != 0 -} - -/// Rounds up -fn num_unsanitized_leaves(num_bytes: usize) -> usize { - (num_bytes + HASHSIZE - 1) / HASHSIZE -} - -fn num_bytes(num_leaves: usize) -> usize { - num_leaves * HASHSIZE -} - -#[derive(Debug, PartialEq, Clone)] -pub struct TreeHashCache { - cache: Vec, - chunk_modified: Vec, -} - -impl Into> for TreeHashCache { - fn into(self) -> Vec { - self.cache - } -} - -impl TreeHashCache { - pub fn new(item: &T) -> Result - where - T: CachedTreeHashSubTree, - { - item.new_tree_hash_cache() - } - - pub fn from_elems(cache: Vec, chunk_modified: Vec) -> Self { - Self { - cache, - chunk_modified, - } - } - - pub fn from_leaves_and_subtrees( - item: &T, - leaves_and_subtrees: Vec, - ) -> Result - where - T: CachedTreeHashSubTree, - { - let offset_handler = BTreeOverlay::new(item, 0)?; - - // Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out - // later. - let num_provided_leaf_nodes = leaves_and_subtrees.len(); - - // Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill - // all the to-be-built internal nodes with zeros and append the leaves and subtrees. - let internal_node_bytes = offset_handler.num_internal_nodes * BYTES_PER_CHUNK; - let leaves_and_subtrees_bytes = leaves_and_subtrees - .iter() - .fold(0, |acc, t| acc + t.bytes_len()); - let mut cache = Vec::with_capacity(leaves_and_subtrees_bytes + internal_node_bytes); - cache.resize(internal_node_bytes, 0); - - // Allocate enough bytes to store all the leaves. - let mut leaves = Vec::with_capacity(offset_handler.num_leaf_nodes * HASHSIZE); - - // Iterate through all of the leaves/subtrees, adding their root as a leaf node and then - // concatenating their merkle trees. - for t in leaves_and_subtrees { - leaves.append(&mut t.root().ok_or_else(|| Error::NoBytesForRoot)?.to_vec()); - cache.append(&mut t.into_merkle_tree()); - } - - // Pad the leaves to an even power-of-two, using zeros. - pad_for_leaf_count(num_provided_leaf_nodes, &mut cache); - - // Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros - // internal nodes created earlier with the internal nodes generated by `merkleize`. - let mut merkleized = merkleize(leaves); - merkleized.split_off(internal_node_bytes); - cache.splice(0..internal_node_bytes, merkleized); - - Ok(Self { - chunk_modified: vec![false; cache.len() / BYTES_PER_CHUNK], - cache, - }) - } - - pub fn from_bytes(bytes: Vec, initial_modified_state: bool) -> Result { - if bytes.len() % BYTES_PER_CHUNK > 0 { - return Err(Error::BytesAreNotEvenChunks(bytes.len())); - } - - Ok(Self { - chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK], - cache: bytes, - }) - } - - pub fn bytes_len(&self) -> usize { - self.cache.len() - } - - pub fn root(&self) -> Option<&[u8]> { - self.cache.get(0..HASHSIZE) - } - - pub fn splice(&mut self, chunk_range: Range, replace_with: Self) { - let (bytes, bools) = replace_with.into_components(); - - // Update the `chunk_modified` vec, marking all spliced-in nodes as changed. - self.chunk_modified.splice(chunk_range.clone(), bools); - self.cache - .splice(node_range_to_byte_range(&chunk_range), bytes); - } - - pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { - let start = chunk * BYTES_PER_CHUNK; - let end = start + BYTES_PER_CHUNK; - - if !self.chunk_equals(chunk, to)? { - self.cache - .get_mut(start..end) - .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))? - .copy_from_slice(to); - self.chunk_modified[chunk] = true; - } - - Ok(()) - } - - pub fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { - Some(( - self.cache.get(node_range_to_byte_range(&chunk_range))?, - self.chunk_modified.get(chunk_range)?, - )) - } - - pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { - let start = chunk * BYTES_PER_CHUNK; - let end = start + BYTES_PER_CHUNK; - - self.cache - .get_mut(start..end) - .ok_or_else(|| Error::NoBytesForChunk(chunk))? - .copy_from_slice(to); - - self.chunk_modified[chunk] = true; - - Ok(()) - } - - pub fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { - let start = chunk * BYTES_PER_CHUNK; - let end = start + BYTES_PER_CHUNK; - - Ok(self - .cache - .get(start..end) - .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?) - } - - pub fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { - Ok(self.get_chunk(chunk)? == other) - } - - pub fn changed(&self, chunk: usize) -> Result { - self.chunk_modified - .get(chunk) - .cloned() - .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk)) - } - - pub fn either_modified(&self, children: (&usize, &usize)) -> Result { - Ok(self.changed(*children.0)? | self.changed(*children.1)?) - } - - pub fn hash_children(&self, children: (&usize, &usize)) -> Result, Error> { - let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2); - child_bytes.append(&mut self.get_chunk(*children.0)?.to_vec()); - child_bytes.append(&mut self.get_chunk(*children.1)?.to_vec()); - - Ok(hash(&child_bytes)) - } - - pub fn mix_in_length(&self, chunk: usize, length: usize) -> Result, Error> { - let mut bytes = Vec::with_capacity(2 * BYTES_PER_CHUNK); - - bytes.append(&mut self.get_chunk(chunk)?.to_vec()); - bytes.append(&mut int_to_bytes32(length as u64)); - - Ok(hash(&bytes)) - } - - pub fn into_merkle_tree(self) -> Vec { - self.cache - } - - pub fn into_components(self) -> (Vec, Vec) { - (self.cache, self.chunk_modified) - } -} diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs b/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs deleted file mode 100644 index e8c04a91e..000000000 --- a/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs +++ /dev/null @@ -1,100 +0,0 @@ -use super::*; - -#[derive(Debug)] -pub struct BTreeOverlay { - pub num_internal_nodes: usize, - pub num_leaf_nodes: usize, - pub first_node: usize, - pub next_node: usize, - offsets: Vec, -} - -impl BTreeOverlay { - pub fn new(item: &T, initial_offset: usize) -> Result - where - T: CachedTreeHashSubTree, - { - item.tree_hash_cache_overlay(initial_offset) - } - - pub fn from_lengths(offset: usize, mut lengths: Vec) -> Result { - // Extend it to the next power-of-two, if it is not already. - let num_leaf_nodes = if lengths.len().is_power_of_two() { - lengths.len() - } else { - let num_leaf_nodes = lengths.len().next_power_of_two(); - lengths.resize(num_leaf_nodes, 1); - num_leaf_nodes - }; - - let num_nodes = num_nodes(num_leaf_nodes); - let num_internal_nodes = num_nodes - num_leaf_nodes; - - let mut offsets = Vec::with_capacity(num_nodes); - offsets.append(&mut (offset..offset + num_internal_nodes).collect()); - - let mut next_node = num_internal_nodes + offset; - for i in 0..num_leaf_nodes { - offsets.push(next_node); - next_node += lengths[i]; - } - - Ok(Self { - num_internal_nodes, - num_leaf_nodes, - offsets, - first_node: offset, - next_node, - }) - } - - pub fn root(&self) -> usize { - self.first_node - } - - pub fn height(&self) -> usize { - self.num_leaf_nodes.trailing_zeros() as usize - } - - pub fn chunk_range(&self) -> Range { - self.first_node..self.next_node - } - - pub fn total_chunks(&self) -> usize { - self.next_node - self.first_node - } - - pub fn total_nodes(&self) -> usize { - self.num_internal_nodes + self.num_leaf_nodes - } - - pub fn first_leaf_node(&self) -> Result { - self.offsets - .get(self.num_internal_nodes) - .cloned() - .ok_or_else(|| Error::NoFirstNode) - } - - /// Returns an iterator visiting each internal node, providing the left and right child chunks - /// for the node. - pub fn iter_internal_nodes<'a>( - &'a self, - ) -> impl DoubleEndedIterator { - let internal_nodes = &self.offsets[0..self.num_internal_nodes]; - - internal_nodes.iter().enumerate().map(move |(i, parent)| { - let children = children(i); - ( - parent, - (&self.offsets[children.0], &self.offsets[children.1]), - ) - }) - } - - /// Returns an iterator visiting each leaf node, providing the chunk for that node. - pub fn iter_leaf_nodes<'a>(&'a self) -> impl DoubleEndedIterator { - let leaf_nodes = &self.offsets[self.num_internal_nodes..]; - - leaf_nodes.iter() - } -} diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs b/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs deleted file mode 100644 index 6500e4eff..000000000 --- a/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs +++ /dev/null @@ -1,31 +0,0 @@ -use super::resize::{grow_merkle_cache, shrink_merkle_cache}; -use super::*; - -mod vec; - -impl CachedTreeHashSubTree for u64 { - fn new_tree_hash_cache(&self) -> Result { - Ok(TreeHashCache::from_bytes( - merkleize(self.to_le_bytes().to_vec()), - false, - )?) - } - - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { - BTreeOverlay::from_lengths(chunk_offset, vec![1]) - } - - fn update_tree_hash_cache( - &self, - other: &Self, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result { - if self != other { - let leaf = merkleize(self.to_le_bytes().to_vec()); - cache.modify_chunk(chunk, &leaf)?; - } - - Ok(chunk + 1) - } -} diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs b/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs deleted file mode 100644 index 1cd7eb902..000000000 --- a/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs +++ /dev/null @@ -1,171 +0,0 @@ -use super::*; - -impl CachedTreeHashSubTree> for Vec -where - T: CachedTreeHashSubTree + TreeHash, -{ - fn new_tree_hash_cache(&self) -> Result { - match T::tree_hash_type() { - TreeHashType::Basic => { - TreeHashCache::from_bytes(merkleize(get_packed_leaves(self)?), false) - } - TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { - let subtrees = self - .iter() - .map(|item| TreeHashCache::new(item)) - .collect::, _>>()?; - - TreeHashCache::from_leaves_and_subtrees(self, subtrees) - } - } - } - - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { - let lengths = match T::tree_hash_type() { - TreeHashType::Basic => vec![1; self.len() / T::tree_hash_packing_factor()], - TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { - let mut lengths = vec![]; - - for item in self { - lengths.push(BTreeOverlay::new(item, 0)?.total_nodes()) - } - - lengths - } - }; - - BTreeOverlay::from_lengths(chunk_offset, lengths) - } - - fn update_tree_hash_cache( - &self, - other: &Vec, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result { - let offset_handler = BTreeOverlay::new(self, chunk)?; - let old_offset_handler = BTreeOverlay::new(other, chunk)?; - - if offset_handler.num_leaf_nodes != old_offset_handler.num_leaf_nodes { - let old_offset_handler = BTreeOverlay::new(other, chunk)?; - - // Get slices of the exsiting tree from the cache. - let (old_bytes, old_flags) = cache - .slices(old_offset_handler.chunk_range()) - .ok_or_else(|| Error::UnableToObtainSlices)?; - - let (new_bytes, new_flags) = - if offset_handler.num_leaf_nodes > old_offset_handler.num_leaf_nodes { - grow_merkle_cache( - old_bytes, - old_flags, - old_offset_handler.height(), - offset_handler.height(), - ) - .ok_or_else(|| Error::UnableToGrowMerkleTree)? - } else { - shrink_merkle_cache( - old_bytes, - old_flags, - old_offset_handler.height(), - offset_handler.height(), - offset_handler.total_chunks(), - ) - .ok_or_else(|| Error::UnableToShrinkMerkleTree)? - }; - - // Create a `TreeHashCache` from the raw elements. - let modified_cache = TreeHashCache::from_elems(new_bytes, new_flags); - - // Splice the newly created `TreeHashCache` over the existing elements. - cache.splice(old_offset_handler.chunk_range(), modified_cache); - } - - match T::tree_hash_type() { - TreeHashType::Basic => { - let leaves = get_packed_leaves(self)?; - - for (i, chunk) in offset_handler.iter_leaf_nodes().enumerate() { - if let Some(latest) = leaves.get(i * HASHSIZE..(i + 1) * HASHSIZE) { - cache.maybe_update_chunk(*chunk, latest)?; - } - } - let first_leaf_chunk = offset_handler.first_leaf_node()?; - - cache.splice( - first_leaf_chunk..offset_handler.next_node, - TreeHashCache::from_bytes(leaves, true)?, - ); - } - TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { - let mut i = offset_handler.num_leaf_nodes; - for &start_chunk in offset_handler.iter_leaf_nodes().rev() { - i -= 1; - match (other.get(i), self.get(i)) { - // The item existed in the previous list and exsits in the current list. - (Some(old), Some(new)) => { - new.update_tree_hash_cache(old, cache, start_chunk)?; - } - // The item existed in the previous list but does not exist in this list. - // - // I.e., the list has been shortened. - (Some(old), None) => { - // Splice out the entire tree of the removed node, replacing it with a - // single padding node. - let end_chunk = BTreeOverlay::new(old, start_chunk)?.next_node; - - cache.splice( - start_chunk..end_chunk, - TreeHashCache::from_bytes(vec![0; HASHSIZE], true)?, - ); - } - // The item existed in the previous list but does exist in this list. - // - // I.e., the list has been lengthened. - (None, Some(new)) => { - let bytes: Vec = TreeHashCache::new(new)?.into(); - - cache.splice( - start_chunk..start_chunk + 1, - TreeHashCache::from_bytes(bytes, true)?, - ); - } - // The item didn't exist in the old list and doesn't exist in the new list, - // nothing to do. - (None, None) => {} - }; - } - } - } - - for (&parent, children) in offset_handler.iter_internal_nodes().rev() { - if cache.either_modified(children)? { - cache.modify_chunk(parent, &cache.hash_children(children)?)?; - } - } - - // If the root node or the length has changed, mix in the length of the list. - let root_node = offset_handler.root(); - if cache.changed(root_node)? | (self.len() != other.len()) { - cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?; - } - - Ok(offset_handler.next_node) - } -} - -fn get_packed_leaves(vec: &Vec) -> Result, Error> -where - T: CachedTreeHashSubTree, -{ - let num_packed_bytes = (BYTES_PER_CHUNK / T::tree_hash_packing_factor()) * vec.len(); - let num_leaves = num_sanitized_leaves(num_packed_bytes); - - let mut packed = Vec::with_capacity(num_leaves * HASHSIZE); - - for item in vec { - packed.append(&mut item.tree_hash_packed_encoding()); - } - - Ok(sanitise_bytes(packed)) -} diff --git a/eth2/utils/tree_hash/src/standard_tree_hash/impls.rs b/eth2/utils/tree_hash/src/impls.rs similarity index 71% rename from eth2/utils/tree_hash/src/standard_tree_hash/impls.rs rename to eth2/utils/tree_hash/src/impls.rs index be6b4ba07..42ea9add0 100644 --- a/eth2/utils/tree_hash/src/standard_tree_hash/impls.rs +++ b/eth2/utils/tree_hash/src/impls.rs @@ -1,5 +1,8 @@ use super::*; +use crate::merkleize::merkle_root; use ethereum_types::H256; +use hashing::hash; +use int_to_bytes::int_to_bytes32; macro_rules! impl_for_bitsize { ($type: ident, $bit_size: expr) => { @@ -16,6 +19,7 @@ macro_rules! impl_for_bitsize { HASHSIZE / ($bit_size / 8) } + #[allow(clippy::cast_lossless)] fn tree_hash_root(&self) -> Vec { int_to_bytes32(*self as u64) } @@ -49,15 +53,15 @@ impl TreeHash for bool { impl TreeHash for [u8; 4] { fn tree_hash_type() -> TreeHashType { - TreeHashType::List + TreeHashType::Vector } fn tree_hash_packed_encoding(&self) -> Vec { - panic!("bytesN should never be packed.") + unreachable!("bytesN should never be packed.") } fn tree_hash_packing_factor() -> usize { - panic!("bytesN should never be packed.") + unreachable!("bytesN should never be packed.") } fn tree_hash_root(&self) -> Vec { @@ -83,31 +87,38 @@ impl TreeHash for H256 { } } -impl TreeHash for Vec -where - T: TreeHash, -{ - fn tree_hash_type() -> TreeHashType { - TreeHashType::List - } +macro_rules! impl_for_list { + ($type: ty) => { + impl TreeHash for $type + where + T: TreeHash, + { + fn tree_hash_type() -> TreeHashType { + TreeHashType::List + } - fn tree_hash_packed_encoding(&self) -> Vec { - unreachable!("List should never be packed.") - } + fn tree_hash_packed_encoding(&self) -> Vec { + unreachable!("List should never be packed.") + } - fn tree_hash_packing_factor() -> usize { - unreachable!("List should never be packed.") - } + fn tree_hash_packing_factor() -> usize { + unreachable!("List should never be packed.") + } - fn tree_hash_root(&self) -> Vec { - let mut root_and_len = Vec::with_capacity(HASHSIZE * 2); - root_and_len.append(&mut vec_tree_hash_root(self)); - root_and_len.append(&mut int_to_bytes32(self.len() as u64)); + fn tree_hash_root(&self) -> Vec { + let mut root_and_len = Vec::with_capacity(HASHSIZE * 2); + root_and_len.append(&mut vec_tree_hash_root(self)); + root_and_len.append(&mut int_to_bytes32(self.len() as u64)); - hash(&root_and_len) - } + hash(&root_and_len) + } + } + }; } +impl_for_list!(Vec); +impl_for_list!(&[T]); + pub fn vec_tree_hash_root(vec: &[T]) -> Vec where T: TreeHash, diff --git a/eth2/utils/tree_hash/src/lib.rs b/eth2/utils/tree_hash/src/lib.rs index ed60079c8..2554e70c3 100644 --- a/eth2/utils/tree_hash/src/lib.rs +++ b/eth2/utils/tree_hash/src/lib.rs @@ -1,14 +1,9 @@ -pub mod cached_tree_hash; -pub mod signed_root; -pub mod standard_tree_hash; +pub mod impls; +pub mod merkleize; pub const BYTES_PER_CHUNK: usize = 32; pub const HASHSIZE: usize = 32; -pub const MERKLE_HASH_CHUNCK: usize = 2 * BYTES_PER_CHUNK; - -pub use cached_tree_hash::{BTreeOverlay, CachedTreeHashSubTree, Error, TreeHashCache}; -pub use signed_root::SignedRoot; -pub use standard_tree_hash::{merkle_root, TreeHash}; +pub const MERKLE_HASH_CHUNK: usize = 2 * BYTES_PER_CHUNK; #[derive(Debug, PartialEq, Clone)] pub enum TreeHashType { @@ -18,13 +13,18 @@ pub enum TreeHashType { Container, } -fn num_sanitized_leaves(num_bytes: usize) -> usize { - let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE; - leaves.next_power_of_two() +pub trait TreeHash { + fn tree_hash_type() -> TreeHashType; + + fn tree_hash_packed_encoding(&self) -> Vec; + + fn tree_hash_packing_factor() -> usize; + + fn tree_hash_root(&self) -> Vec; } -fn num_nodes(num_leaves: usize) -> usize { - 2 * num_leaves - 1 +pub trait SignedRoot: TreeHash { + fn signed_root(&self) -> Vec; } #[macro_export] @@ -44,11 +44,12 @@ macro_rules! tree_hash_ssz_encoding_as_vector { } fn tree_hash_root(&self) -> Vec { - tree_hash::merkle_root(&ssz::ssz_encode(self)) + tree_hash::merkleize::merkle_root(&ssz::ssz_encode(self)) } } }; } + #[macro_export] macro_rules! tree_hash_ssz_encoding_as_list { ($type: ident) => { diff --git a/eth2/utils/tree_hash/src/standard_tree_hash.rs b/eth2/utils/tree_hash/src/merkleize.rs similarity index 74% rename from eth2/utils/tree_hash/src/standard_tree_hash.rs rename to eth2/utils/tree_hash/src/merkleize.rs index 812a2c352..9482895ec 100644 --- a/eth2/utils/tree_hash/src/standard_tree_hash.rs +++ b/eth2/utils/tree_hash/src/merkleize.rs @@ -1,20 +1,5 @@ use super::*; use hashing::hash; -use int_to_bytes::int_to_bytes32; - -pub use impls::vec_tree_hash_root; - -mod impls; - -pub trait TreeHash { - fn tree_hash_type() -> TreeHashType; - - fn tree_hash_packed_encoding(&self) -> Vec; - - fn tree_hash_packing_factor() -> usize; - - fn tree_hash_root(&self) -> Vec; -} pub fn merkle_root(bytes: &[u8]) -> Vec { // TODO: replace this with a more memory efficient method. @@ -41,16 +26,16 @@ pub fn efficient_merkleize(bytes: &[u8]) -> Vec { assert_eq!(o.len(), num_bytes); - let empty_chunk_hash = hash(&[0; MERKLE_HASH_CHUNCK]); + let empty_chunk_hash = hash(&[0; MERKLE_HASH_CHUNK]); let mut i = nodes * HASHSIZE; let mut j = internal_nodes * HASHSIZE; - while i >= MERKLE_HASH_CHUNCK { - i -= MERKLE_HASH_CHUNCK; + while i >= MERKLE_HASH_CHUNK { + i -= MERKLE_HASH_CHUNK; j -= HASHSIZE; - let hash = match o.get(i..i + MERKLE_HASH_CHUNCK) { + let hash = match o.get(i..i + MERKLE_HASH_CHUNK) { // All bytes are available, hash as ususal. Some(slice) => hash(slice), // Unable to get all the bytes. @@ -59,7 +44,7 @@ pub fn efficient_merkleize(bytes: &[u8]) -> Vec { // Able to get some of the bytes, pad them out. Some(slice) => { let mut bytes = slice.to_vec(); - bytes.resize(MERKLE_HASH_CHUNCK, 0); + bytes.resize(MERKLE_HASH_CHUNK, 0); hash(&bytes) } // Unable to get any bytes, use the empty-chunk hash. @@ -73,3 +58,12 @@ pub fn efficient_merkleize(bytes: &[u8]) -> Vec { o } + +fn num_sanitized_leaves(num_bytes: usize) -> usize { + let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE; + leaves.next_power_of_two() +} + +fn num_nodes(num_leaves: usize) -> usize { + 2 * num_leaves - 1 +} diff --git a/eth2/utils/tree_hash/src/signed_root.rs b/eth2/utils/tree_hash/src/signed_root.rs deleted file mode 100644 index f7aeca4af..000000000 --- a/eth2/utils/tree_hash/src/signed_root.rs +++ /dev/null @@ -1,5 +0,0 @@ -use crate::TreeHash; - -pub trait SignedRoot: TreeHash { - fn signed_root(&self) -> Vec; -} diff --git a/eth2/utils/tree_hash/tests/tests.rs b/eth2/utils/tree_hash/tests/tests.rs deleted file mode 100644 index 4d2c6f282..000000000 --- a/eth2/utils/tree_hash/tests/tests.rs +++ /dev/null @@ -1,1080 +0,0 @@ -use hashing::hash; -use int_to_bytes::{int_to_bytes32, int_to_bytes8}; -use tree_hash::cached_tree_hash::*; -use tree_hash::standard_tree_hash::*; -use tree_hash::*; - -#[derive(Clone, Debug)] -pub struct InternalCache { - pub a: u64, - pub b: u64, - pub cache: Option, -} - -impl TreeHash for InternalCache { - fn tree_hash_type() -> TreeHashType { - TreeHashType::Container - } - - fn tree_hash_packed_encoding(&self) -> Vec { - unreachable!("Struct should never be packed.") - } - - fn tree_hash_packing_factor() -> usize { - unreachable!("Struct should never be packed.") - } - - fn tree_hash_root(&self) -> Vec { - let mut leaves = Vec::with_capacity(4 * HASHSIZE); - - leaves.append(&mut self.a.tree_hash_root()); - leaves.append(&mut self.b.tree_hash_root()); - - efficient_merkleize(&leaves)[0..32].to_vec() - } -} - -impl CachedTreeHash for InternalCache { - fn update_internal_tree_hash_cache(mut self, mut old: Self) -> Result<(Self, Self), Error> { - let mut local_cache = old.cache; - old.cache = None; - - if let Some(ref mut local_cache) = local_cache { - self.update_tree_hash_cache(&old, local_cache, 0)?; - } else { - local_cache = Some(self.new_tree_hash_cache()?) - } - - self.cache = local_cache; - - Ok((old, self)) - } - - fn cached_tree_hash_root(&self) -> Option> { - match &self.cache { - None => None, - Some(c) => Some(c.root()?.to_vec()), - } - } - - fn clone_without_tree_hash_cache(&self) -> Self { - Self { - a: self.a, - b: self.b, - cache: None, - } - } -} - -#[test] -fn works_when_embedded() { - let old = InternalCache { - a: 99, - b: 99, - cache: None, - }; - - let mut new = old.clone_without_tree_hash_cache(); - new.a = 1; - new.b = 2; - - let (_old, new) = new.update_internal_tree_hash_cache(old).unwrap(); - - let root = new.cached_tree_hash_root().unwrap(); - - let leaves = vec![int_to_bytes32(1), int_to_bytes32(2)]; - let merkle = merkleize(join(leaves)); - - assert_eq!(&merkle[0..32], &root[..]); -} - -impl CachedTreeHashSubTree for InternalCache { - fn new_tree_hash_cache(&self) -> Result { - let tree = TreeHashCache::from_leaves_and_subtrees( - self, - vec![self.a.new_tree_hash_cache()?, self.b.new_tree_hash_cache()?], - )?; - - Ok(tree) - } - - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { - let mut lengths = vec![]; - - lengths.push(BTreeOverlay::new(&self.a, 0)?.total_nodes()); - lengths.push(BTreeOverlay::new(&self.b, 0)?.total_nodes()); - - BTreeOverlay::from_lengths(chunk_offset, lengths) - } - - fn update_tree_hash_cache( - &self, - other: &Self, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result { - let offset_handler = BTreeOverlay::new(self, chunk)?; - - // Skip past the internal nodes and update any changed leaf nodes. - { - let chunk = offset_handler.first_leaf_node()?; - let chunk = self.a.update_tree_hash_cache(&other.a, cache, chunk)?; - let _chunk = self.b.update_tree_hash_cache(&other.b, cache, chunk)?; - } - - for (&parent, children) in offset_handler.iter_internal_nodes().rev() { - if cache.either_modified(children)? { - cache.modify_chunk(parent, &cache.hash_children(children)?)?; - } - } - - Ok(offset_handler.next_node) - } -} - -fn num_nodes(num_leaves: usize) -> usize { - 2 * num_leaves - 1 -} - -#[derive(Clone, Debug)] -pub struct Inner { - pub a: u64, - pub b: u64, - pub c: u64, - pub d: u64, -} - -impl TreeHash for Inner { - fn tree_hash_type() -> TreeHashType { - TreeHashType::Container - } - - fn tree_hash_packed_encoding(&self) -> Vec { - unreachable!("Struct should never be packed.") - } - - fn tree_hash_packing_factor() -> usize { - unreachable!("Struct should never be packed.") - } - - fn tree_hash_root(&self) -> Vec { - let mut leaves = Vec::with_capacity(4 * HASHSIZE); - - leaves.append(&mut self.a.tree_hash_root()); - leaves.append(&mut self.b.tree_hash_root()); - leaves.append(&mut self.c.tree_hash_root()); - leaves.append(&mut self.d.tree_hash_root()); - - efficient_merkleize(&leaves)[0..32].to_vec() - } -} - -impl CachedTreeHashSubTree for Inner { - fn new_tree_hash_cache(&self) -> Result { - let tree = TreeHashCache::from_leaves_and_subtrees( - self, - vec![ - self.a.new_tree_hash_cache()?, - self.b.new_tree_hash_cache()?, - self.c.new_tree_hash_cache()?, - self.d.new_tree_hash_cache()?, - ], - )?; - - Ok(tree) - } - - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { - let mut lengths = vec![]; - - lengths.push(BTreeOverlay::new(&self.a, 0)?.total_nodes()); - lengths.push(BTreeOverlay::new(&self.b, 0)?.total_nodes()); - lengths.push(BTreeOverlay::new(&self.c, 0)?.total_nodes()); - lengths.push(BTreeOverlay::new(&self.d, 0)?.total_nodes()); - - BTreeOverlay::from_lengths(chunk_offset, lengths) - } - - fn update_tree_hash_cache( - &self, - other: &Self, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result { - let offset_handler = BTreeOverlay::new(self, chunk)?; - - // Skip past the internal nodes and update any changed leaf nodes. - { - let chunk = offset_handler.first_leaf_node()?; - let chunk = self.a.update_tree_hash_cache(&other.a, cache, chunk)?; - let chunk = self.b.update_tree_hash_cache(&other.b, cache, chunk)?; - let chunk = self.c.update_tree_hash_cache(&other.c, cache, chunk)?; - let _chunk = self.d.update_tree_hash_cache(&other.d, cache, chunk)?; - } - - for (&parent, children) in offset_handler.iter_internal_nodes().rev() { - if cache.either_modified(children)? { - cache.modify_chunk(parent, &cache.hash_children(children)?)?; - } - } - - Ok(offset_handler.next_node) - } -} - -#[derive(Clone, Debug)] -pub struct Outer { - pub a: u64, - pub b: Inner, - pub c: u64, -} - -impl TreeHash for Outer { - fn tree_hash_type() -> TreeHashType { - TreeHashType::Container - } - - fn tree_hash_packed_encoding(&self) -> Vec { - unreachable!("Struct should never be packed.") - } - - fn tree_hash_packing_factor() -> usize { - unreachable!("Struct should never be packed.") - } - - fn tree_hash_root(&self) -> Vec { - let mut leaves = Vec::with_capacity(4 * HASHSIZE); - - leaves.append(&mut self.a.tree_hash_root()); - leaves.append(&mut self.b.tree_hash_root()); - leaves.append(&mut self.c.tree_hash_root()); - - efficient_merkleize(&leaves)[0..32].to_vec() - } -} - -impl CachedTreeHashSubTree for Outer { - fn new_tree_hash_cache(&self) -> Result { - let tree = TreeHashCache::from_leaves_and_subtrees( - self, - vec![ - self.a.new_tree_hash_cache()?, - self.b.new_tree_hash_cache()?, - self.c.new_tree_hash_cache()?, - ], - )?; - - Ok(tree) - } - - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { - let mut lengths = vec![]; - - lengths.push(BTreeOverlay::new(&self.a, 0)?.total_nodes()); - lengths.push(BTreeOverlay::new(&self.b, 0)?.total_nodes()); - lengths.push(BTreeOverlay::new(&self.c, 0)?.total_nodes()); - - BTreeOverlay::from_lengths(chunk_offset, lengths) - } - - fn update_tree_hash_cache( - &self, - other: &Self, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result { - let offset_handler = BTreeOverlay::new(self, chunk)?; - - // Skip past the internal nodes and update any changed leaf nodes. - { - let chunk = offset_handler.first_leaf_node()?; - let chunk = self.a.update_tree_hash_cache(&other.a, cache, chunk)?; - let chunk = self.b.update_tree_hash_cache(&other.b, cache, chunk)?; - let _chunk = self.c.update_tree_hash_cache(&other.c, cache, chunk)?; - } - - for (&parent, children) in offset_handler.iter_internal_nodes().rev() { - if cache.either_modified(children)? { - cache.modify_chunk(parent, &cache.hash_children(children)?)?; - } - } - - Ok(offset_handler.next_node) - } -} - -fn join(many: Vec>) -> Vec { - let mut all = vec![]; - for one in many { - all.extend_from_slice(&mut one.clone()) - } - all -} - -#[test] -fn partial_modification_to_inner_struct() { - let original_inner = Inner { - a: 1, - b: 2, - c: 3, - d: 4, - }; - - let original_outer = Outer { - a: 0, - b: original_inner.clone(), - c: 5, - }; - - let modified_inner = Inner { - a: 42, - ..original_inner.clone() - }; - - // Modify outer - let modified_outer = Outer { - b: modified_inner.clone(), - ..original_outer.clone() - }; - - // Perform a differential hash - let mut cache_struct = TreeHashCache::new(&original_outer).unwrap(); - - modified_outer - .update_tree_hash_cache(&original_outer, &mut cache_struct, 0) - .unwrap(); - - let modified_cache: Vec = cache_struct.into(); - - // Generate reference data. - let mut data = vec![]; - data.append(&mut int_to_bytes32(0)); - let inner_bytes: Vec = TreeHashCache::new(&modified_inner).unwrap().into(); - data.append(&mut int_to_bytes32(5)); - - let leaves = vec![ - int_to_bytes32(0), - inner_bytes[0..32].to_vec(), - int_to_bytes32(5), - vec![0; 32], // padding - ]; - let mut merkle = merkleize(join(leaves)); - merkle.splice(4 * 32..5 * 32, inner_bytes); - - assert_eq!(merkle.len() / HASHSIZE, 13); - assert_eq!(modified_cache.len() / HASHSIZE, 13); - - assert_eq!(merkle, modified_cache); -} - -#[test] -fn partial_modification_to_outer() { - let inner = Inner { - a: 1, - b: 2, - c: 3, - d: 4, - }; - - let original_outer = Outer { - a: 0, - b: inner.clone(), - c: 5, - }; - - // Build the initial cache. - // let original_cache = original_outer.build_cache_bytes(); - - // Modify outer - let modified_outer = Outer { - c: 42, - ..original_outer.clone() - }; - - // Perform a differential hash - let mut cache_struct = TreeHashCache::new(&original_outer).unwrap(); - - modified_outer - .update_tree_hash_cache(&original_outer, &mut cache_struct, 0) - .unwrap(); - - let modified_cache: Vec = cache_struct.into(); - - // Generate reference data. - let mut data = vec![]; - data.append(&mut int_to_bytes32(0)); - let inner_bytes: Vec = TreeHashCache::new(&inner).unwrap().into(); - data.append(&mut int_to_bytes32(5)); - - let leaves = vec![ - int_to_bytes32(0), - inner_bytes[0..32].to_vec(), - int_to_bytes32(42), - vec![0; 32], // padding - ]; - let mut merkle = merkleize(join(leaves)); - merkle.splice(4 * 32..5 * 32, inner_bytes); - - assert_eq!(merkle.len() / HASHSIZE, 13); - assert_eq!(modified_cache.len() / HASHSIZE, 13); - - assert_eq!(merkle, modified_cache); -} - -#[test] -fn outer_builds() { - let inner = Inner { - a: 1, - b: 2, - c: 3, - d: 4, - }; - - let outer = Outer { - a: 0, - b: inner.clone(), - c: 5, - }; - - // Build the function output. - let cache: Vec = TreeHashCache::new(&outer).unwrap().into(); - - // Generate reference data. - let mut data = vec![]; - data.append(&mut int_to_bytes32(0)); - let inner_bytes: Vec = TreeHashCache::new(&inner).unwrap().into(); - data.append(&mut int_to_bytes32(5)); - - let leaves = vec![ - int_to_bytes32(0), - inner_bytes[0..32].to_vec(), - int_to_bytes32(5), - vec![0; 32], // padding - ]; - let mut merkle = merkleize(join(leaves)); - merkle.splice(4 * 32..5 * 32, inner_bytes); - - assert_eq!(merkle.len() / HASHSIZE, 13); - assert_eq!(cache.len() / HASHSIZE, 13); - - assert_eq!(merkle, cache); -} - -fn mix_in_length(root: &mut [u8], len: usize) { - let mut bytes = root.to_vec(); - bytes.append(&mut int_to_bytes32(len as u64)); - - root.copy_from_slice(&hash(&bytes)); -} - -/// Generic test that covers: -/// -/// 1. Produce a new cache from `original`. -/// 2. Do a differential hash between `original` and `modified`. -/// 3. Test that the cache generated matches the one we generate manually. -/// -/// In effect it ensures that we can do a differential hash between two `Vec`. -fn test_u64_vec_modifications(original: Vec, modified: Vec) { - // Generate initial cache. - let original_cache: Vec = TreeHashCache::new(&original).unwrap().into(); - - // Perform a differential hash - let mut cache_struct = TreeHashCache::from_bytes(original_cache.clone(), false).unwrap(); - modified - .update_tree_hash_cache(&original, &mut cache_struct, 0) - .unwrap(); - let modified_cache: Vec = cache_struct.into(); - - // Generate reference data. - let mut data = vec![]; - for i in &modified { - data.append(&mut int_to_bytes8(*i)); - } - let data = sanitise_bytes(data); - let mut expected = merkleize(data); - - mix_in_length(&mut expected[0..HASHSIZE], modified.len()); - - assert_eq!(expected, modified_cache); - assert_eq!(&expected[0..32], &modified.tree_hash_root()[..]); -} - -#[test] -fn partial_modification_u64_vec() { - let n: u64 = 2_u64.pow(5); - - let original_vec: Vec = (0..n).collect(); - - let mut modified_vec = original_vec.clone(); - modified_vec[n as usize - 1] = 42; - - test_u64_vec_modifications(original_vec, modified_vec); -} - -#[test] -fn shortened_u64_vec_len_within_pow_2_boundary() { - let n: u64 = 2_u64.pow(5) - 1; - - let original_vec: Vec = (0..n).collect(); - - let mut modified_vec = original_vec.clone(); - modified_vec.pop(); - - test_u64_vec_modifications(original_vec, modified_vec); -} - -#[test] -fn shortened_u64_vec_len_outside_pow_2_boundary() { - let original_vec: Vec = (0..2_u64.pow(6)).collect(); - - let modified_vec: Vec = (0..2_u64.pow(5)).collect(); - - test_u64_vec_modifications(original_vec, modified_vec); -} - -#[test] -fn extended_u64_vec_len_within_pow_2_boundary() { - let n: u64 = 2_u64.pow(5) - 2; - - let original_vec: Vec = (0..n).collect(); - - let mut modified_vec = original_vec.clone(); - modified_vec.push(42); - - test_u64_vec_modifications(original_vec, modified_vec); -} - -#[test] -fn extended_u64_vec_len_outside_pow_2_boundary() { - let original_vec: Vec = (0..2_u64.pow(5)).collect(); - - let modified_vec: Vec = (0..2_u64.pow(6)).collect(); - - test_u64_vec_modifications(original_vec, modified_vec); -} - -#[test] -fn large_vec_of_u64_builds() { - let n: u64 = 50; - - let my_vec: Vec = (0..n).collect(); - - // Generate function output. - let cache: Vec = TreeHashCache::new(&my_vec).unwrap().into(); - - // Generate reference data. - let mut data = vec![]; - for i in &my_vec { - data.append(&mut int_to_bytes8(*i)); - } - let data = sanitise_bytes(data); - let expected = merkleize(data); - - assert_eq!(expected, cache); -} - -/// Generic test that covers: -/// -/// 1. Produce a new cache from `original`. -/// 2. Do a differential hash between `original` and `modified`. -/// 3. Test that the cache generated matches the one we generate manually. -/// -/// The `reference` vec is used to build the tree hash cache manually. `Inner` is just 4x `u64`, so -/// you can represent 2x `Inner` with a `reference` vec of len 8. -/// -/// In effect it ensures that we can do a differential hash between two `Vec`. -fn test_inner_vec_modifications(original: Vec, modified: Vec, reference: Vec) { - let mut cache = TreeHashCache::new(&original).unwrap(); - - modified - .update_tree_hash_cache(&original, &mut cache, 0) - .unwrap(); - let modified_cache: Vec = cache.into(); - - // Build the reference vec. - - let mut leaves = vec![]; - let mut full_bytes = vec![]; - - for n in reference.chunks(4) { - let mut merkle = merkleize(join(vec![ - int_to_bytes32(n[0]), - int_to_bytes32(n[1]), - int_to_bytes32(n[2]), - int_to_bytes32(n[3]), - ])); - leaves.append(&mut merkle[0..HASHSIZE].to_vec()); - full_bytes.append(&mut merkle); - } - - let num_leaves = leaves.len() / HASHSIZE; - let mut expected = merkleize(leaves); - - let num_internal_nodes = num_leaves.next_power_of_two() - 1; - expected.splice(num_internal_nodes * HASHSIZE.., full_bytes); - - for _ in num_leaves..num_leaves.next_power_of_two() { - expected.append(&mut vec![0; HASHSIZE]); - } - - mix_in_length(&mut expected[0..HASHSIZE], modified.len()); - - // Compare the cached tree to the reference tree. - assert_trees_eq(&expected, &modified_cache); - assert_eq!(&expected[0..32], &modified.tree_hash_root()[..]); -} - -#[test] -fn partial_modification_of_vec_of_inner() { - let original = vec![ - Inner { - a: 0, - b: 1, - c: 2, - d: 3, - }, - Inner { - a: 4, - b: 5, - c: 6, - d: 7, - }, - Inner { - a: 8, - b: 9, - c: 10, - d: 11, - }, - ]; - - let mut modified = original.clone(); - modified[1].a = 42; - - let mut reference_vec: Vec = (0..12).collect(); - reference_vec[4] = 42; - - test_inner_vec_modifications(original, modified, reference_vec); -} - -#[test] -fn shortened_vec_of_inner_within_power_of_two_boundary() { - let original = vec![ - Inner { - a: 0, - b: 1, - c: 2, - d: 3, - }, - Inner { - a: 4, - b: 5, - c: 6, - d: 7, - }, - Inner { - a: 8, - b: 9, - c: 10, - d: 11, - }, - Inner { - a: 12, - b: 13, - c: 14, - d: 15, - }, - ]; - - let mut modified = original.clone(); - modified.pop(); // remove the last element from the list. - - let reference_vec: Vec = (0..12).collect(); - - test_inner_vec_modifications(original, modified, reference_vec); -} - -#[test] -fn shortened_vec_of_inner_outside_power_of_two_boundary() { - let original = vec![ - Inner { - a: 0, - b: 1, - c: 2, - d: 3, - }, - Inner { - a: 4, - b: 5, - c: 6, - d: 7, - }, - Inner { - a: 8, - b: 9, - c: 10, - d: 11, - }, - Inner { - a: 12, - b: 13, - c: 14, - d: 15, - }, - Inner { - a: 16, - b: 17, - c: 18, - d: 19, - }, - ]; - - let mut modified = original.clone(); - modified.pop(); // remove the last element from the list. - - let reference_vec: Vec = (0..16).collect(); - - test_inner_vec_modifications(original, modified, reference_vec); -} - -#[test] -fn lengthened_vec_of_inner_within_power_of_two_boundary() { - let original = vec![ - Inner { - a: 0, - b: 1, - c: 2, - d: 3, - }, - Inner { - a: 4, - b: 5, - c: 6, - d: 7, - }, - Inner { - a: 8, - b: 9, - c: 10, - d: 11, - }, - ]; - - let mut modified = original.clone(); - modified.push(Inner { - a: 12, - b: 13, - c: 14, - d: 15, - }); - - let reference_vec: Vec = (0..16).collect(); - - test_inner_vec_modifications(original, modified, reference_vec); -} - -#[test] -fn lengthened_vec_of_inner_outside_power_of_two_boundary() { - let original = vec![ - Inner { - a: 0, - b: 1, - c: 2, - d: 3, - }, - Inner { - a: 4, - b: 5, - c: 6, - d: 7, - }, - Inner { - a: 8, - b: 9, - c: 10, - d: 11, - }, - Inner { - a: 12, - b: 13, - c: 14, - d: 15, - }, - ]; - - let mut modified = original.clone(); - modified.push(Inner { - a: 16, - b: 17, - c: 18, - d: 19, - }); - - let reference_vec: Vec = (0..20).collect(); - - test_inner_vec_modifications(original, modified, reference_vec); -} - -#[test] -fn vec_of_inner_builds() { - let numbers: Vec = (0..12).collect(); - - let mut leaves = vec![]; - let mut full_bytes = vec![]; - - for n in numbers.chunks(4) { - let mut merkle = merkleize(join(vec![ - int_to_bytes32(n[0]), - int_to_bytes32(n[1]), - int_to_bytes32(n[2]), - int_to_bytes32(n[3]), - ])); - leaves.append(&mut merkle[0..HASHSIZE].to_vec()); - full_bytes.append(&mut merkle); - } - - let mut expected = merkleize(leaves); - expected.splice(3 * HASHSIZE.., full_bytes); - expected.append(&mut vec![0; HASHSIZE]); - - let my_vec = vec![ - Inner { - a: 0, - b: 1, - c: 2, - d: 3, - }, - Inner { - a: 4, - b: 5, - c: 6, - d: 7, - }, - Inner { - a: 8, - b: 9, - c: 10, - d: 11, - }, - ]; - - let cache: Vec = TreeHashCache::new(&my_vec).unwrap().into(); - - assert_trees_eq(&expected, &cache); -} - -/// Provides detailed assertions when comparing merkle trees. -fn assert_trees_eq(a: &[u8], b: &[u8]) { - assert_eq!(a.len(), b.len(), "Byte lens different"); - for i in (0..a.len() / HASHSIZE).rev() { - let range = i * HASHSIZE..(i + 1) * HASHSIZE; - assert_eq!( - a[range.clone()], - b[range], - "Chunk {}/{} different \n\n a: {:?} \n\n b: {:?}", - i, - a.len() / HASHSIZE, - a, - b, - ); - } -} - -#[test] -fn vec_of_u64_builds() { - let data = join(vec![ - int_to_bytes8(1), - int_to_bytes8(2), - int_to_bytes8(3), - int_to_bytes8(4), - int_to_bytes8(5), - vec![0; 32 - 8], // padding - ]); - - let expected = merkleize(data); - - let my_vec = vec![1, 2, 3, 4, 5]; - - // - // Note: the length is not mixed-in in this example. The user must ensure the length is - // mixed-in. - // - - let cache: Vec = TreeHashCache::new(&my_vec).unwrap().into(); - - assert_eq!(expected, cache); -} - -#[test] -fn vec_does_mix_in_len() { - let data = join(vec![ - int_to_bytes8(1), - int_to_bytes8(2), - int_to_bytes8(3), - int_to_bytes8(4), - int_to_bytes8(5), - vec![0; 32 - 8], // padding - ]); - - let tree = merkleize(data); - - let my_vec: Vec = vec![1, 2, 3, 4, 5]; - - let mut expected = vec![0; 32]; - expected.copy_from_slice(&tree[0..HASHSIZE]); - expected.append(&mut int_to_bytes32(my_vec.len() as u64)); - let expected = hash(&expected); - - assert_eq!(&expected[0..HASHSIZE], &my_vec.tree_hash_root()[..]); -} - -#[test] -fn merkleize_odd() { - let data = join(vec![ - int_to_bytes32(1), - int_to_bytes32(2), - int_to_bytes32(3), - int_to_bytes32(4), - int_to_bytes32(5), - ]); - - let merkle = merkleize(sanitise_bytes(data)); - - let expected_len = num_nodes(8) * BYTES_PER_CHUNK; - - assert_eq!(merkle.len(), expected_len); -} - -fn generic_test(index: usize) { - let inner = Inner { - a: 1, - b: 2, - c: 3, - d: 4, - }; - - let cache: Vec = TreeHashCache::new(&inner).unwrap().into(); - - let changed_inner = match index { - 0 => Inner { - a: 42, - ..inner.clone() - }, - 1 => Inner { - b: 42, - ..inner.clone() - }, - 2 => Inner { - c: 42, - ..inner.clone() - }, - 3 => Inner { - d: 42, - ..inner.clone() - }, - _ => panic!("bad index"), - }; - - let mut cache_struct = TreeHashCache::from_bytes(cache.clone(), false).unwrap(); - - changed_inner - .update_tree_hash_cache(&inner, &mut cache_struct, 0) - .unwrap(); - - // assert_eq!(*cache_struct.hash_count, 3); - - let new_tree_hash_cache: Vec = cache_struct.into(); - - let data1 = int_to_bytes32(1); - let data2 = int_to_bytes32(2); - let data3 = int_to_bytes32(3); - let data4 = int_to_bytes32(4); - - let mut data = vec![data1, data2, data3, data4]; - - data[index] = int_to_bytes32(42); - - let expected = merkleize(join(data)); - - assert_eq!(expected, new_tree_hash_cache); -} - -#[test] -fn cached_hash_on_inner() { - generic_test(0); - generic_test(1); - generic_test(2); - generic_test(3); -} - -#[test] -fn inner_builds() { - let data1 = int_to_bytes32(1); - let data2 = int_to_bytes32(2); - let data3 = int_to_bytes32(3); - let data4 = int_to_bytes32(4); - - let data = join(vec![data1, data2, data3, data4]); - let expected = merkleize(data); - - let inner = Inner { - a: 1, - b: 2, - c: 3, - d: 4, - }; - - let cache: Vec = TreeHashCache::new(&inner).unwrap().into(); - - assert_eq!(expected, cache); -} - -#[test] -fn merkleize_4_leaves() { - let data1 = hash(&int_to_bytes32(1)); - let data2 = hash(&int_to_bytes32(2)); - let data3 = hash(&int_to_bytes32(3)); - let data4 = hash(&int_to_bytes32(4)); - - let data = join(vec![ - data1.clone(), - data2.clone(), - data3.clone(), - data4.clone(), - ]); - - let cache = merkleize(data); - - let hash_12 = { - let mut joined = vec![]; - joined.append(&mut data1.clone()); - joined.append(&mut data2.clone()); - hash(&joined) - }; - let hash_34 = { - let mut joined = vec![]; - joined.append(&mut data3.clone()); - joined.append(&mut data4.clone()); - hash(&joined) - }; - let hash_hash12_hash_34 = { - let mut joined = vec![]; - joined.append(&mut hash_12.clone()); - joined.append(&mut hash_34.clone()); - hash(&joined) - }; - - for (i, chunk) in cache.chunks(HASHSIZE).enumerate().rev() { - let expected = match i { - 0 => hash_hash12_hash_34.clone(), - 1 => hash_12.clone(), - 2 => hash_34.clone(), - 3 => data1.clone(), - 4 => data2.clone(), - 5 => data3.clone(), - 6 => data4.clone(), - _ => vec![], - }; - - assert_eq!(chunk, &expected[..], "failed at {}", i); - } -} diff --git a/eth2/utils/tree_hash_derive/Cargo.toml b/eth2/utils/tree_hash_derive/Cargo.toml index f227d7954..8544108a7 100644 --- a/eth2/utils/tree_hash_derive/Cargo.toml +++ b/eth2/utils/tree_hash_derive/Cargo.toml @@ -10,6 +10,7 @@ proc-macro = true [dev-dependencies] tree_hash = { path = "../tree_hash" } +cached_tree_hash = { path = "../cached_tree_hash" } [dependencies] syn = "0.15" diff --git a/eth2/utils/tree_hash_derive/src/lib.rs b/eth2/utils/tree_hash_derive/src/lib.rs index 343287313..50727a89f 100644 --- a/eth2/utils/tree_hash_derive/src/lib.rs +++ b/eth2/utils/tree_hash_derive/src/lib.rs @@ -37,10 +37,10 @@ fn should_skip_hashing(field: &syn::Field) -> bool { .any(|attr| attr.into_token_stream().to_string() == "# [ tree_hash ( skip_hashing ) ]") } -/// Implements `tree_hash::CachedTreeHashSubTree` for some `struct`. +/// Implements `tree_hash::CachedTreeHash` for some `struct`. /// /// Fields are hashed in the order they are defined. -#[proc_macro_derive(CachedTreeHashSubTree, attributes(tree_hash))] +#[proc_macro_derive(CachedTreeHash, attributes(tree_hash))] pub fn subtree_derive(input: TokenStream) -> TokenStream { let item = parse_macro_input!(input as DeriveInput); @@ -54,56 +54,57 @@ pub fn subtree_derive(input: TokenStream) -> TokenStream { let idents_a = get_hashable_named_field_idents(&struct_data); let idents_b = idents_a.clone(); let idents_c = idents_a.clone(); - let idents_d = idents_a.clone(); let output = quote! { - impl tree_hash::CachedTreeHashSubTree<#name> for #name { - fn new_tree_hash_cache(&self) -> Result { - let tree = tree_hash::TreeHashCache::from_leaves_and_subtrees( + impl cached_tree_hash::CachedTreeHash for #name { + fn new_tree_hash_cache(&self, depth: usize) -> Result { + let tree = cached_tree_hash::TreeHashCache::from_subtrees( self, vec![ #( - self.#idents_a.new_tree_hash_cache()?, + self.#idents_a.new_tree_hash_cache(depth)?, )* ], + depth )?; Ok(tree) } - fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { + fn num_tree_hash_cache_chunks(&self) -> usize { + cached_tree_hash::BTreeOverlay::new(self, 0, 0).num_chunks() + } + + fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema { let mut lengths = vec![]; #( - lengths.push(tree_hash::BTreeOverlay::new(&self.#idents_b, 0)?.total_nodes()); + lengths.push(self.#idents_b.num_tree_hash_cache_chunks()); )* - tree_hash::BTreeOverlay::from_lengths(chunk_offset, lengths) + cached_tree_hash::BTreeSchema::from_lengths(depth, lengths) } - fn update_tree_hash_cache( - &self, - other: &Self, - cache: &mut tree_hash::TreeHashCache, - chunk: usize, - ) -> Result { - let offset_handler = tree_hash::BTreeOverlay::new(self, chunk)?; + fn update_tree_hash_cache(&self, cache: &mut cached_tree_hash::TreeHashCache) -> Result<(), cached_tree_hash::Error> { + let overlay = cached_tree_hash::BTreeOverlay::new(self, cache.chunk_index, 0); - // Skip past the internal nodes and update any changed leaf nodes. - { - let chunk = offset_handler.first_leaf_node()?; - #( - let chunk = self.#idents_c.update_tree_hash_cache(&other.#idents_d, cache, chunk)?; - )* - } - for (&parent, children) in offset_handler.iter_internal_nodes().rev() { - if cache.either_modified(children)? { - cache.modify_chunk(parent, &cache.hash_children(children)?)?; - } - } + // Skip the chunk index to the first leaf node of this struct. + cache.chunk_index = overlay.first_leaf_node(); + // Skip the overlay index to the first leaf node of this struct. + // cache.overlay_index += 1; - Ok(offset_handler.next_node) + // Recurse into the struct items, updating their caches. + #( + self.#idents_c.update_tree_hash_cache(cache)?; + )* + + // Iterate through the internal nodes, updating them if their children have changed. + cache.update_internal_nodes(&overlay)?; + + cache.chunk_index = overlay.next_node(); + + Ok(()) } } }; @@ -147,7 +148,7 @@ pub fn tree_hash_derive(input: TokenStream) -> TokenStream { leaves.append(&mut self.#idents.tree_hash_root()); )* - tree_hash::merkle_root(&leaves) + tree_hash::merkleize::merkle_root(&leaves) } } }; @@ -177,7 +178,7 @@ pub fn tree_hash_signed_root_derive(input: TokenStream) -> TokenStream { leaves.append(&mut self.#idents.tree_hash_root()); )* - tree_hash::merkle_root(&leaves) + tree_hash::merkleize::merkle_root(&leaves) } } }; diff --git a/eth2/utils/tree_hash_derive/tests/tests.rs b/eth2/utils/tree_hash_derive/tests/tests.rs index a7c74b23e..d4fd55165 100644 --- a/eth2/utils/tree_hash_derive/tests/tests.rs +++ b/eth2/utils/tree_hash_derive/tests/tests.rs @@ -1,7 +1,8 @@ -use tree_hash::{CachedTreeHashSubTree, SignedRoot, TreeHash}; -use tree_hash_derive::{CachedTreeHashSubTree, SignedRoot, TreeHash}; +use cached_tree_hash::{CachedTreeHash, TreeHashCache}; +use tree_hash::{merkleize::merkle_root, SignedRoot, TreeHash}; +use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash}; -#[derive(Clone, Debug, TreeHash, CachedTreeHashSubTree)] +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] pub struct Inner { pub a: u64, pub b: u64, @@ -9,22 +10,18 @@ pub struct Inner { pub d: u64, } -fn test_standard_and_cached(original: &T, modified: &T) -where - T: CachedTreeHashSubTree, -{ - let mut cache = original.new_tree_hash_cache().unwrap(); +fn test_standard_and_cached(original: &T, modified: &T) { + // let mut cache = original.new_tree_hash_cache().unwrap(); + let mut cache = TreeHashCache::new(original).unwrap(); let standard_root = original.tree_hash_root(); - let cached_root = cache.root().unwrap().to_vec(); + let cached_root = cache.tree_hash_root().unwrap(); assert_eq!(standard_root, cached_root); // Test after a modification - modified - .update_tree_hash_cache(&original, &mut cache, 0) - .unwrap(); + cache.update(modified).unwrap(); let standard_root = modified.tree_hash_root(); - let cached_root = cache.root().unwrap().to_vec(); + let cached_root = cache.tree_hash_root().unwrap(); assert_eq!(standard_root, cached_root); } @@ -44,7 +41,7 @@ fn inner_standard_vs_cached() { test_standard_and_cached(&original, &modified); } -#[derive(Clone, Debug, TreeHash, CachedTreeHashSubTree)] +#[derive(Clone, Debug, TreeHash, CachedTreeHash)] pub struct Uneven { pub a: u64, pub b: u64, @@ -120,7 +117,7 @@ impl CryptoKitties { leaves.append(&mut self.best_kitty.tree_hash_root()); leaves.append(&mut self.worst_kitty.tree_hash_root()); leaves.append(&mut self.kitties.tree_hash_root()); - tree_hash::merkle_root(&leaves) + merkle_root(&leaves) } } @@ -158,14 +155,14 @@ impl Casper { let mut list = Vec::new(); list.append(&mut self.friendly.tree_hash_root()); list.append(&mut self.friends.tree_hash_root()); - tree_hash::merkle_root(&list) + merkle_root(&list) } fn expected_tree_hash(&self) -> Vec { let mut list = Vec::new(); list.append(&mut self.friendly.tree_hash_root()); list.append(&mut self.dead.tree_hash_root()); - tree_hash::merkle_root(&list) + merkle_root(&list) } }