From 93f3fc858d97791563ab52eb1a1dea78b8f1ec46 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 16 Apr 2019 09:14:33 +1000 Subject: [PATCH] Add uncached tree hashing --- eth2/utils/tree_hash/src/cached_tree_hash.rs | 125 ++++++++++++++++ .../{ => cached_tree_hash}/btree_overlay.rs | 0 .../src/{ => cached_tree_hash}/impls.rs | 0 .../src/{ => cached_tree_hash}/impls/vec.rs | 0 .../src/{ => cached_tree_hash}/resize.rs | 0 eth2/utils/tree_hash/src/lib.rs | 134 +----------------- .../utils/tree_hash/src/standard_tree_hash.rs | 114 +++++++++++++++ eth2/utils/tree_hash/tests/tests.rs | 25 ++++ 8 files changed, 268 insertions(+), 130 deletions(-) rename eth2/utils/tree_hash/src/{ => cached_tree_hash}/btree_overlay.rs (100%) rename eth2/utils/tree_hash/src/{ => cached_tree_hash}/impls.rs (100%) rename eth2/utils/tree_hash/src/{ => cached_tree_hash}/impls/vec.rs (100%) rename eth2/utils/tree_hash/src/{ => cached_tree_hash}/resize.rs (100%) create mode 100644 eth2/utils/tree_hash/src/standard_tree_hash.rs diff --git a/eth2/utils/tree_hash/src/cached_tree_hash.rs b/eth2/utils/tree_hash/src/cached_tree_hash.rs index 048d4bab5..fc12cfbba 100644 --- a/eth2/utils/tree_hash/src/cached_tree_hash.rs +++ b/eth2/utils/tree_hash/src/cached_tree_hash.rs @@ -1,4 +1,129 @@ use super::*; +use hashing::hash; +use int_to_bytes::int_to_bytes32; +use std::ops::Range; + +pub mod btree_overlay; +pub mod impls; +pub mod resize; + +pub use btree_overlay::BTreeOverlay; + +#[derive(Debug, PartialEq, Clone)] +pub enum Error { + ShouldNotProduceBTreeOverlay, + NoFirstNode, + NoBytesForRoot, + UnableToObtainSlices, + UnableToGrowMerkleTree, + UnableToShrinkMerkleTree, + ShouldNeverBePacked(ItemType), + BytesAreNotEvenChunks(usize), + NoModifiedFieldForChunk(usize), + NoBytesForChunk(usize), +} + +pub trait CachedTreeHash: CachedTreeHashSubTree + Sized { + fn update_internal_tree_hash_cache(self, old: T) -> Result<(Self, Self), Error>; + + fn cached_tree_hash_root(&self) -> Option>; + + fn clone_without_tree_hash_cache(&self) -> Self; +} + +pub trait CachedTreeHashSubTree { + fn item_type() -> ItemType; + + fn btree_overlay(&self, chunk_offset: usize) -> Result; + + fn packed_encoding(&self) -> Result, Error>; + + fn packing_factor() -> usize; + + fn new_cache(&self) -> Result; + + fn update_cache( + &self, + other: &Item, + cache: &mut TreeHashCache, + chunk: usize, + ) -> Result; +} + +fn children(parent: usize) -> (usize, usize) { + ((2 * parent + 1), (2 * parent + 2)) +} + +fn node_range_to_byte_range(node_range: &Range) -> Range { + node_range.start * HASHSIZE..node_range.end * HASHSIZE +} + +/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize +/// them, returning the entire merkle tree. +/// +/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`. +pub fn merkleize(values: Vec) -> Vec { + let values = sanitise_bytes(values); + + let leaves = values.len() / HASHSIZE; + + if leaves == 0 { + panic!("No full leaves"); + } + + if !leaves.is_power_of_two() { + panic!("leaves is not power of two"); + } + + let mut o: Vec = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE]; + o.append(&mut values.to_vec()); + + let mut i = o.len(); + let mut j = o.len() - values.len(); + + while i >= MERKLE_HASH_CHUNCK { + i -= MERKLE_HASH_CHUNCK; + let hash = hash(&o[i..i + MERKLE_HASH_CHUNCK]); + + j -= HASHSIZE; + o[j..j + HASHSIZE].copy_from_slice(&hash); + } + + o +} + +pub fn sanitise_bytes(mut bytes: Vec) -> Vec { + let present_leaves = num_unsanitized_leaves(bytes.len()); + let required_leaves = present_leaves.next_power_of_two(); + + if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) { + bytes.resize(num_bytes(required_leaves), 0); + } + + bytes +} + +fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { + let required_leaves = num_leaves.next_power_of_two(); + + bytes.resize( + bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK, + 0, + ); +} + +fn last_leaf_needs_padding(num_bytes: usize) -> bool { + num_bytes % HASHSIZE != 0 +} + +/// Rounds up +fn num_unsanitized_leaves(num_bytes: usize) -> usize { + (num_bytes + HASHSIZE - 1) / HASHSIZE +} + +fn num_bytes(num_leaves: usize) -> usize { + num_leaves * HASHSIZE +} #[derive(Debug, PartialEq, Clone)] pub struct TreeHashCache { diff --git a/eth2/utils/tree_hash/src/btree_overlay.rs b/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs similarity index 100% rename from eth2/utils/tree_hash/src/btree_overlay.rs rename to eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs diff --git a/eth2/utils/tree_hash/src/impls.rs b/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs similarity index 100% rename from eth2/utils/tree_hash/src/impls.rs rename to eth2/utils/tree_hash/src/cached_tree_hash/impls.rs diff --git a/eth2/utils/tree_hash/src/impls/vec.rs b/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs similarity index 100% rename from eth2/utils/tree_hash/src/impls/vec.rs rename to eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs diff --git a/eth2/utils/tree_hash/src/resize.rs b/eth2/utils/tree_hash/src/cached_tree_hash/resize.rs similarity index 100% rename from eth2/utils/tree_hash/src/resize.rs rename to eth2/utils/tree_hash/src/cached_tree_hash/resize.rs diff --git a/eth2/utils/tree_hash/src/lib.rs b/eth2/utils/tree_hash/src/lib.rs index 5ec2b0283..4e5302bca 100644 --- a/eth2/utils/tree_hash/src/lib.rs +++ b/eth2/utils/tree_hash/src/lib.rs @@ -1,33 +1,10 @@ -use hashing::hash; -use int_to_bytes::int_to_bytes32; -use std::ops::Range; - -mod btree_overlay; -mod cached_tree_hash; -mod impls; -mod resize; - -pub use btree_overlay::BTreeOverlay; -pub use cached_tree_hash::TreeHashCache; +pub mod cached_tree_hash; +pub mod standard_tree_hash; pub const BYTES_PER_CHUNK: usize = 32; pub const HASHSIZE: usize = 32; pub const MERKLE_HASH_CHUNCK: usize = 2 * BYTES_PER_CHUNK; -#[derive(Debug, PartialEq, Clone)] -pub enum Error { - ShouldNotProduceBTreeOverlay, - NoFirstNode, - NoBytesForRoot, - UnableToObtainSlices, - UnableToGrowMerkleTree, - UnableToShrinkMerkleTree, - ShouldNeverBePacked(ItemType), - BytesAreNotEvenChunks(usize), - NoModifiedFieldForChunk(usize), - NoBytesForChunk(usize), -} - #[derive(Debug, PartialEq, Clone)] pub enum ItemType { Basic, @@ -35,114 +12,11 @@ pub enum ItemType { Composite, } -pub trait CachedTreeHash: CachedTreeHashSubTree + Sized { - fn update_internal_tree_hash_cache(self, old: T) -> Result<(Self, Self), Error>; - - fn cached_tree_hash_root(&self) -> Option>; - - fn clone_without_tree_hash_cache(&self) -> Self; -} - -pub trait CachedTreeHashSubTree { - fn item_type() -> ItemType; - - fn btree_overlay(&self, chunk_offset: usize) -> Result; - - fn packed_encoding(&self) -> Result, Error>; - - fn packing_factor() -> usize; - - fn new_cache(&self) -> Result; - - fn update_cache( - &self, - other: &Item, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result; -} - -fn children(parent: usize) -> (usize, usize) { - ((2 * parent + 1), (2 * parent + 2)) -} - -fn num_nodes(num_leaves: usize) -> usize { - 2 * num_leaves - 1 -} - -fn node_range_to_byte_range(node_range: &Range) -> Range { - node_range.start * HASHSIZE..node_range.end * HASHSIZE -} - -/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize -/// them, returning the entire merkle tree. -/// -/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`. -pub fn merkleize(values: Vec) -> Vec { - let values = sanitise_bytes(values); - - let leaves = values.len() / HASHSIZE; - - if leaves == 0 { - panic!("No full leaves"); - } - - if !leaves.is_power_of_two() { - panic!("leaves is not power of two"); - } - - let mut o: Vec = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE]; - o.append(&mut values.to_vec()); - - let mut i = o.len(); - let mut j = o.len() - values.len(); - - while i >= MERKLE_HASH_CHUNCK { - i -= MERKLE_HASH_CHUNCK; - let hash = hash(&o[i..i + MERKLE_HASH_CHUNCK]); - - j -= HASHSIZE; - o[j..j + HASHSIZE].copy_from_slice(&hash); - } - - o -} - -pub fn sanitise_bytes(mut bytes: Vec) -> Vec { - let present_leaves = num_unsanitized_leaves(bytes.len()); - let required_leaves = present_leaves.next_power_of_two(); - - if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) { - bytes.resize(num_bytes(required_leaves), 0); - } - - bytes -} - -fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { - let required_leaves = num_leaves.next_power_of_two(); - - bytes.resize( - bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK, - 0, - ); -} - -fn last_leaf_needs_padding(num_bytes: usize) -> bool { - num_bytes % HASHSIZE != 0 -} - -/// Rounds up -fn num_unsanitized_leaves(num_bytes: usize) -> usize { - (num_bytes + HASHSIZE - 1) / HASHSIZE -} - -/// Rounds up fn num_sanitized_leaves(num_bytes: usize) -> usize { let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE; leaves.next_power_of_two() } -fn num_bytes(num_leaves: usize) -> usize { - num_leaves * HASHSIZE +fn num_nodes(num_leaves: usize) -> usize { + 2 * num_leaves - 1 } diff --git a/eth2/utils/tree_hash/src/standard_tree_hash.rs b/eth2/utils/tree_hash/src/standard_tree_hash.rs new file mode 100644 index 000000000..c8119a790 --- /dev/null +++ b/eth2/utils/tree_hash/src/standard_tree_hash.rs @@ -0,0 +1,114 @@ +use super::*; +use hashing::hash; +use int_to_bytes::int_to_bytes32; +use ssz::ssz_encode; + +pub trait TreeHash { + fn tree_hash_item_type() -> ItemType; + + fn tree_hash_packed_encoding(&self) -> Vec; + + fn hash_tree_root(&self) -> Vec; +} + +impl TreeHash for u64 { + fn tree_hash_item_type() -> ItemType { + ItemType::Basic + } + + fn tree_hash_packed_encoding(&self) -> Vec { + ssz_encode(self) + } + + fn hash_tree_root(&self) -> Vec { + int_to_bytes32(*self) + } +} + +impl TreeHash for Vec +where + T: TreeHash, +{ + fn tree_hash_item_type() -> ItemType { + ItemType::List + } + + fn tree_hash_packed_encoding(&self) -> Vec { + unreachable!("List should never be packed.") + } + + fn hash_tree_root(&self) -> Vec { + let leaves = match T::tree_hash_item_type() { + ItemType::Basic => { + let mut leaves = vec![]; + + for item in self { + leaves.append(&mut item.tree_hash_packed_encoding()); + } + + leaves + } + ItemType::Composite | ItemType::List => { + let mut leaves = Vec::with_capacity(self.len() * HASHSIZE); + + for item in self { + leaves.append(&mut item.hash_tree_root()) + } + + leaves + } + }; + + // Mix in the length + let mut root_and_len = Vec::with_capacity(HASHSIZE * 2); + root_and_len.append(&mut efficient_merkleize(&leaves)[0..32].to_vec()); + root_and_len.append(&mut int_to_bytes32(self.len() as u64)); + + hash(&root_and_len) + } +} + +pub fn efficient_merkleize(bytes: &[u8]) -> Vec { + let leaves = num_sanitized_leaves(bytes.len()); + let nodes = num_nodes(leaves); + let internal_nodes = nodes - leaves; + + let num_bytes = internal_nodes * HASHSIZE + bytes.len(); + + let mut o: Vec = vec![0; internal_nodes * HASHSIZE]; + o.append(&mut bytes.to_vec()); + + assert_eq!(o.len(), num_bytes); + + let empty_chunk_hash = hash(&[0; MERKLE_HASH_CHUNCK]); + + let mut i = nodes * HASHSIZE; + let mut j = internal_nodes * HASHSIZE; + + while i >= MERKLE_HASH_CHUNCK { + i -= MERKLE_HASH_CHUNCK; + + j -= HASHSIZE; + let hash = match o.get(i..i + MERKLE_HASH_CHUNCK) { + // All bytes are available, hash as ususal. + Some(slice) => hash(slice), + // Unable to get all the bytes. + None => { + match o.get(i..) { + // Able to get some of the bytes, pad them out. + Some(slice) => { + let mut bytes = slice.to_vec(); + bytes.resize(MERKLE_HASH_CHUNCK, 0); + hash(&bytes) + } + // Unable to get any bytes, use the empty-chunk hash. + None => empty_chunk_hash.clone(), + } + } + }; + + o[j..j + HASHSIZE].copy_from_slice(&hash); + } + + o +} diff --git a/eth2/utils/tree_hash/tests/tests.rs b/eth2/utils/tree_hash/tests/tests.rs index ead6d8c00..d65192cd5 100644 --- a/eth2/utils/tree_hash/tests/tests.rs +++ b/eth2/utils/tree_hash/tests/tests.rs @@ -1,5 +1,7 @@ use hashing::hash; use int_to_bytes::{int_to_bytes32, int_to_bytes8}; +use tree_hash::cached_tree_hash::*; +use tree_hash::standard_tree_hash::*; use tree_hash::*; #[derive(Clone, Debug)] @@ -131,6 +133,27 @@ pub struct Inner { pub d: u64, } +impl TreeHash for Inner { + fn tree_hash_item_type() -> ItemType { + ItemType::Composite + } + + fn tree_hash_packed_encoding(&self) -> Vec { + unreachable!("Struct should never be packed.") + } + + fn hash_tree_root(&self) -> Vec { + let mut leaves = Vec::with_capacity(4 * HASHSIZE); + + leaves.append(&mut self.a.hash_tree_root()); + leaves.append(&mut self.b.hash_tree_root()); + leaves.append(&mut self.c.hash_tree_root()); + leaves.append(&mut self.d.hash_tree_root()); + + efficient_merkleize(&leaves)[0..32].to_vec() + } +} + impl CachedTreeHashSubTree for Inner { fn item_type() -> ItemType { ItemType::Composite @@ -458,6 +481,7 @@ fn test_u64_vec_modifications(original: Vec, modified: Vec) { mix_in_length(&mut expected[0..HASHSIZE], modified.len()); assert_eq!(expected, modified_cache); + assert_eq!(&expected[0..32], &modified.hash_tree_root()[..]); } #[test] @@ -580,6 +604,7 @@ fn test_inner_vec_modifications(original: Vec, modified: Vec, refe // Compare the cached tree to the reference tree. assert_trees_eq(&expected, &modified_cache); + assert_eq!(&expected[0..32], &modified.hash_tree_root()[..]); } #[test]