From 2c12aabf04f9411d0828296b4cae42ed476fc668 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 22 Apr 2019 09:20:13 +1000 Subject: [PATCH] Implement further cache tests and bug fixes --- eth2/utils/tree_hash/src/cached_tree_hash.rs | 90 +++++++----- .../src/cached_tree_hash/btree_overlay.rs | 129 ++++++++++++++++-- .../tree_hash/src/cached_tree_hash/impls.rs | 3 +- .../src/cached_tree_hash/impls/vec.rs | 23 +++- eth2/utils/tree_hash/tests/tests.rs | 98 ++++++++++--- eth2/utils/tree_hash_derive/src/lib.rs | 6 +- 6 files changed, 278 insertions(+), 71 deletions(-) diff --git a/eth2/utils/tree_hash/src/cached_tree_hash.rs b/eth2/utils/tree_hash/src/cached_tree_hash.rs index c2011fdf8..35ddf10cd 100644 --- a/eth2/utils/tree_hash/src/cached_tree_hash.rs +++ b/eth2/utils/tree_hash/src/cached_tree_hash.rs @@ -9,6 +9,44 @@ pub mod resize; pub use btree_overlay::BTreeOverlay; +#[derive(Debug, PartialEq)] +pub struct CachedTreeHasher { + cache: TreeHashCache, +} + +impl CachedTreeHasher { + pub fn new(item: &T) -> Result + where + T: CachedTreeHashSubTree, + { + Ok(Self { + cache: TreeHashCache::new(item)?, + }) + } + + pub fn update(&mut self, item: &T) -> Result<(), Error> + where + T: CachedTreeHashSubTree, + { + // Reset the per-hash counters. + self.cache.chunk_index = 0; + self.cache.overlay_index = 0; + + // Reset the "modified" flags for the cache. + self.cache.reset_modifications(); + + // Update the cache with the (maybe) changed object. + item.update_tree_hash_cache(&mut self.cache)?; + + Ok(()) + } + + pub fn tree_hash_root(&self) -> Result, Error> { + // Return the root of the cache -- the merkle root. + Ok(self.cache.root()?.to_vec()) + } +} + #[derive(Debug, PartialEq, Clone)] pub enum Error { ShouldNotProduceBTreeOverlay, @@ -141,20 +179,6 @@ impl TreeHashCache { item.new_tree_hash_cache() } - pub fn from_elems( - cache: Vec, - chunk_modified: Vec, - overlays: Vec, - ) -> Self { - Self { - cache, - chunk_modified, - overlays, - chunk_index: 0, - overlay_index: 0, - } - } - pub fn from_leaves_and_subtrees( item: &T, leaves_and_subtrees: Vec, @@ -185,7 +209,7 @@ impl TreeHashCache { // Iterate through all of the leaves/subtrees, adding their root as a leaf node and then // concatenating their merkle trees. for t in leaves_and_subtrees { - leaves.append(&mut t.root().ok_or_else(|| Error::NoBytesForRoot)?.to_vec()); + leaves.append(&mut t.root()?.to_vec()); let (mut bytes, _bools, mut t_overlays) = t.into_components(); @@ -245,15 +269,19 @@ impl TreeHashCache { Ok(overlay) } + pub fn reset_modifications(&mut self) { + for chunk_modified in &mut self.chunk_modified { + *chunk_modified = false; + } + } + pub fn replace_overlay( &mut self, overlay_index: usize, + chunk_index: usize, new_overlay: BTreeOverlay, ) -> Result { - let old_overlay = self - .overlays - .get(overlay_index) - .ok_or_else(|| Error::NoOverlayForIndex(overlay_index))?; + let old_overlay = self.get_overlay(overlay_index, chunk_index)?; // Get slices of the exsiting tree from the cache. let (old_bytes, old_flags) = self @@ -291,8 +319,6 @@ impl TreeHashCache { pub fn update_internal_nodes(&mut self, overlay: &BTreeOverlay) -> Result<(), Error> { for (parent, children) in overlay.internal_parents_and_children().into_iter().rev() { - dbg!(parent); - dbg!(&children); if self.either_modified(children)? { self.modify_chunk(parent, &self.hash_children(children)?)?; } @@ -301,15 +327,17 @@ impl TreeHashCache { Ok(()) } - pub fn bytes_len(&self) -> usize { + fn bytes_len(&self) -> usize { self.cache.len() } - pub fn root(&self) -> Option<&[u8]> { - self.cache.get(0..HASHSIZE) + pub fn root(&self) -> Result<&[u8], Error> { + self.cache + .get(0..HASHSIZE) + .ok_or_else(|| Error::NoBytesForRoot) } - pub fn splice(&mut self, chunk_range: Range, bytes: Vec, bools: Vec) { + fn splice(&mut self, chunk_range: Range, bytes: Vec, bools: Vec) { // Update the `chunk_modified` vec, marking all spliced-in nodes as changed. self.chunk_modified.splice(chunk_range.clone(), bools); self.cache @@ -331,14 +359,14 @@ impl TreeHashCache { Ok(()) } - pub fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { + fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { Some(( self.cache.get(node_range_to_byte_range(&chunk_range))?, self.chunk_modified.get(chunk_range)?, )) } - pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { + fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { let start = chunk * BYTES_PER_CHUNK; let end = start + BYTES_PER_CHUNK; @@ -352,7 +380,7 @@ impl TreeHashCache { Ok(()) } - pub fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { + fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { let start = chunk * BYTES_PER_CHUNK; let end = start + BYTES_PER_CHUNK; @@ -362,7 +390,7 @@ impl TreeHashCache { .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?) } - pub fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { + fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { Ok(self.get_chunk(chunk)? == other) } @@ -373,11 +401,11 @@ impl TreeHashCache { .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk)) } - pub fn either_modified(&self, children: (usize, usize)) -> Result { + fn either_modified(&self, children: (usize, usize)) -> Result { Ok(self.changed(children.0)? | self.changed(children.1)?) } - pub fn hash_children(&self, children: (usize, usize)) -> Result, Error> { + fn hash_children(&self, children: (usize, usize)) -> Result, Error> { let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2); child_bytes.append(&mut self.get_chunk(children.0)?.to_vec()); child_bytes.append(&mut self.get_chunk(children.1)?.to_vec()); diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs b/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs index 58c844867..cfd58df8a 100644 --- a/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs +++ b/eth2/utils/tree_hash/src/cached_tree_hash/btree_overlay.rs @@ -3,7 +3,8 @@ use super::*; #[derive(Debug, PartialEq, Clone)] pub struct BTreeOverlay { pub offset: usize, - lengths: Vec, + pub num_items: usize, + pub lengths: Vec, } impl BTreeOverlay { @@ -14,11 +15,19 @@ impl BTreeOverlay { item.tree_hash_cache_overlay(initial_offset) } - pub fn from_lengths(offset: usize, lengths: Vec) -> Result { + pub fn from_lengths( + offset: usize, + num_items: usize, + lengths: Vec, + ) -> Result { if lengths.is_empty() { Err(Error::TreeCannotHaveZeroNodes) } else { - Ok(Self { offset, lengths }) + Ok(Self { + offset, + num_items, + lengths, + }) } } @@ -47,7 +56,8 @@ impl BTreeOverlay { } pub fn next_node(&self) -> usize { - self.first_node() + self.lengths.iter().sum::() + self.first_node() + self.num_internal_nodes() + self.num_leaf_nodes() - self.lengths.len() + + self.lengths.iter().sum::() } pub fn height(&self) -> usize { @@ -78,17 +88,28 @@ impl BTreeOverlay { } } - /// Returns an iterator visiting each internal node, providing the left and right child chunks - /// for the node. + pub fn child_chunks(&self, parent: usize) -> (usize, usize) { + let children = children(parent); + + if children.1 < self.num_internal_nodes() { + (children.0 + self.offset, children.1 + self.offset) + } else { + let chunks = self.n_leaf_node_chunks(children.1); + (chunks[chunks.len() - 2], chunks[chunks.len() - 1]) + } + } + + /// (parent, (left_child, right_child)) pub fn internal_parents_and_children(&self) -> Vec<(usize, (usize, usize))> { + let mut chunks = Vec::with_capacity(self.num_nodes()); + chunks.append(&mut self.internal_node_chunks()); + chunks.append(&mut self.leaf_node_chunks()); + (0..self.num_internal_nodes()) .into_iter() .map(|parent| { let children = children(parent); - ( - parent + self.offset, - (children.0 + self.offset, children.1 + self.offset), - ) + (chunks[parent], (chunks[children.0], chunks[children.1])) }) .collect() } @@ -97,4 +118,92 @@ impl BTreeOverlay { pub fn internal_node_chunks(&self) -> Vec { (self.offset..self.offset + self.num_internal_nodes()).collect() } + + // Returns a `Vec` of the first chunk index for each leaf node of the tree. + pub fn leaf_node_chunks(&self) -> Vec { + self.n_leaf_node_chunks(self.num_leaf_nodes()) + } + + // Returns a `Vec` of the first chunk index for the first `n` leaf nodes of the tree. + fn n_leaf_node_chunks(&self, n: usize) -> Vec { + let mut chunks = Vec::with_capacity(n); + + let mut chunk = self.offset + self.num_internal_nodes(); + for i in 0..n { + chunks.push(chunk); + + match self.lengths.get(i) { + Some(len) => { + chunk += len; + } + None => chunk += 1, + } + } + + chunks + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn get_tree_a(n: usize) -> BTreeOverlay { + BTreeOverlay::from_lengths(0, n, vec![1; n]).unwrap() + } + + #[test] + fn leaf_node_chunks() { + let tree = get_tree_a(4); + + assert_eq!(tree.leaf_node_chunks(), vec![3, 4, 5, 6]) + } + + #[test] + fn internal_node_chunks() { + let tree = get_tree_a(4); + + assert_eq!(tree.internal_node_chunks(), vec![0, 1, 2]) + } + + #[test] + fn internal_parents_and_children() { + let tree = get_tree_a(4); + + assert_eq!( + tree.internal_parents_and_children(), + vec![(0, (1, 2)), (1, (3, 4)), (2, (5, 6))] + ) + } + + #[test] + fn chunk_range() { + let tree = get_tree_a(4); + assert_eq!(tree.chunk_range(), 0..7); + + let tree = get_tree_a(1); + assert_eq!(tree.chunk_range(), 0..1); + + let tree = get_tree_a(2); + assert_eq!(tree.chunk_range(), 0..3); + + let tree = BTreeOverlay::from_lengths(11, 4, vec![1, 1]).unwrap(); + assert_eq!(tree.chunk_range(), 11..14); + } + + #[test] + fn root_of_one_node() { + let tree = get_tree_a(1); + + assert_eq!(tree.root(), 0); + assert_eq!(tree.num_internal_nodes(), 0); + assert_eq!(tree.num_leaf_nodes(), 1); + } + + #[test] + fn child_chunks() { + let tree = get_tree_a(4); + + assert_eq!(tree.child_chunks(0), (1, 2)) + } } diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs b/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs index 4a05a4046..ece025ccd 100644 --- a/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs +++ b/eth2/utils/tree_hash/src/cached_tree_hash/impls.rs @@ -1,4 +1,3 @@ -use super::resize::{grow_merkle_cache, shrink_merkle_cache}; use super::*; mod vec; @@ -13,7 +12,7 @@ impl CachedTreeHashSubTree for u64 { } fn tree_hash_cache_overlay(&self, chunk_offset: usize) -> Result { - BTreeOverlay::from_lengths(chunk_offset, vec![1]) + BTreeOverlay::from_lengths(chunk_offset, 1, vec![1]) } fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { diff --git a/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs b/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs index df49d7a97..d71b58816 100644 --- a/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs +++ b/eth2/utils/tree_hash/src/cached_tree_hash/impls/vec.rs @@ -37,7 +37,8 @@ where let num_leaves = (self.len() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(); - vec![1; num_leaves] + // Disallow zero-length as an empty list still has one all-padding node. + vec![1; std::cmp::max(1, num_leaves)] } TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => { let mut lengths = vec![]; @@ -50,7 +51,7 @@ where } }; - BTreeOverlay::from_lengths(chunk_offset, lengths) + BTreeOverlay::from_lengths(chunk_offset, self.len(), lengths) } fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { @@ -65,7 +66,7 @@ where // This grows/shrinks the bytes to accomodate the new tree, preserving as much of the tree // as possible. if new_overlay.num_leaf_nodes() != old_overlay.num_leaf_nodes() { - cache.replace_overlay(cache.overlay_index, new_overlay.clone())?; + cache.replace_overlay(cache.overlay_index, cache.chunk_index, new_overlay.clone())?; } match T::tree_hash_type() { @@ -132,10 +133,20 @@ where cache.update_internal_nodes(&new_overlay)?; - // Always update the root node as we don't have a reliable check to know if the list len - // has changed. + // Mix in length. let root_node = new_overlay.root(); - cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?; + if cache.changed(root_node)? { + dbg!(cache.get_chunk(12)); + cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?; + } else if old_overlay.num_items != new_overlay.num_items { + if new_overlay.num_internal_nodes() == 0 { + cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?; + } else { + let children = new_overlay.child_chunks(0); + cache.modify_chunk(root_node, &cache.hash_children(children)?)?; + cache.modify_chunk(root_node, &cache.mix_in_length(root_node, self.len())?)?; + } + } cache.chunk_index = new_overlay.next_node(); diff --git a/eth2/utils/tree_hash/tests/tests.rs b/eth2/utils/tree_hash/tests/tests.rs index c3bd9a7b9..52f7a7cb1 100644 --- a/eth2/utils/tree_hash/tests/tests.rs +++ b/eth2/utils/tree_hash/tests/tests.rs @@ -5,13 +5,13 @@ use tree_hash::*; use tree_hash_derive::{CachedTreeHashSubTree, TreeHash}; #[derive(Clone, Debug, TreeHash, CachedTreeHashSubTree)] -pub struct Nested { +pub struct NestedStruct { pub a: u64, pub b: Inner, } #[derive(Clone, Debug, TreeHash, CachedTreeHashSubTree)] -pub struct Thing { +pub struct StructWithVec { pub a: u64, pub b: Inner, pub c: Vec, @@ -21,24 +21,32 @@ fn test_routine(original: T, modified: Vec) where T: CachedTreeHashSubTree, { - let mut cache = original.new_tree_hash_cache().unwrap(); + let mut hasher = CachedTreeHasher::new(&original).unwrap(); let standard_root = original.tree_hash_root(); - let cached_root = cache.root().unwrap().to_vec(); + let cached_root = hasher.tree_hash_root().unwrap(); assert_eq!(standard_root, cached_root, "Initial cache build failed."); for (i, modified) in modified.iter().enumerate() { // Test after a modification - modified.update_tree_hash_cache(&mut cache).unwrap(); + hasher + .update(modified) + .expect(&format!("Modification {}", i)); let standard_root = modified.tree_hash_root(); - let cached_root = cache.root().unwrap().to_vec(); - assert_eq!(standard_root, cached_root, "Modification {} failed.", i); + let cached_root = hasher + .tree_hash_root() + .expect(&format!("Modification {}", i)); + assert_eq!( + standard_root, cached_root, + "Modification {} failed. \n Cache: {:?}", + i, hasher + ); } } #[test] -fn test_nested() { - let original = Nested { +fn test_nested_struct() { + let original = NestedStruct { a: 42, b: Inner { a: 12, @@ -47,7 +55,7 @@ fn test_nested() { d: 15, }, }; - let modified = vec![Nested { + let modified = vec![NestedStruct { a: 99, ..original.clone() }]; @@ -73,8 +81,8 @@ fn test_inner() { } #[test] -fn test_thing() { - let original = Thing { +fn test_struct_with_vec() { + let original = StructWithVec { a: 42, b: Inner { a: 12, @@ -85,10 +93,51 @@ fn test_thing() { c: vec![1, 2, 3, 4, 5], }; - let modified = vec![Thing { - a: 99, - ..original.clone() - }]; + let modified = vec![ + StructWithVec { + a: 99, + ..original.clone() + }, + StructWithVec { + a: 100, + ..original.clone() + }, + StructWithVec { + c: vec![1, 2, 3, 4, 5], + ..original.clone() + }, + StructWithVec { + c: vec![1, 3, 4, 5, 6], + ..original.clone() + }, + StructWithVec { + c: vec![1, 3, 4, 5, 6, 7, 8, 9], + ..original.clone() + }, + StructWithVec { + c: vec![1, 3, 4, 5], + ..original.clone() + }, + StructWithVec { + b: Inner { + a: u64::max_value(), + b: u64::max_value(), + c: u64::max_value(), + d: u64::max_value(), + }, + c: vec![], + ..original.clone() + }, + StructWithVec { + b: Inner { + a: 0, + b: 1, + c: 2, + d: 3, + }, + ..original.clone() + }, + ]; test_routine(original, modified); } @@ -97,7 +146,17 @@ fn test_thing() { fn test_vec() { let original = vec![1, 2, 3, 4, 5]; - let modified = vec![vec![1, 2, 3, 4, 42]]; + let modified = vec![ + vec![1, 2, 3, 4, 42], + vec![1, 2, 3, 4], + vec![], + vec![42; 2_usize.pow(4)], + vec![], + vec![], + vec![1, 2, 3, 4, 42], + vec![1, 2, 3], + vec![1], + ]; test_routine(original, modified); } @@ -158,12 +217,11 @@ impl CachedTreeHashSubTree for Inner { lengths.push(BTreeOverlay::new(&self.c, 0)?.num_nodes()); lengths.push(BTreeOverlay::new(&self.d, 0)?.num_nodes()); - BTreeOverlay::from_lengths(chunk_offset, lengths) + BTreeOverlay::from_lengths(chunk_offset, 4, lengths) } fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { - let overlay = cache.get_overlay(cache.overlay_index, cache.chunk_index)?; - dbg!(&overlay); + let overlay = BTreeOverlay::new(self, cache.chunk_index)?; // Skip the chunk index to the first leaf node of this struct. cache.chunk_index = overlay.first_leaf_node(); diff --git a/eth2/utils/tree_hash_derive/src/lib.rs b/eth2/utils/tree_hash_derive/src/lib.rs index b308953a3..d0a9e7a4e 100644 --- a/eth2/utils/tree_hash_derive/src/lib.rs +++ b/eth2/utils/tree_hash_derive/src/lib.rs @@ -55,6 +55,8 @@ pub fn subtree_derive(input: TokenStream) -> TokenStream { let idents_b = idents_a.clone(); let idents_c = idents_a.clone(); + let num_items = idents_a.len(); + let output = quote! { impl tree_hash::CachedTreeHashSubTree<#name> for #name { fn new_tree_hash_cache(&self) -> Result { @@ -77,11 +79,11 @@ pub fn subtree_derive(input: TokenStream) -> TokenStream { lengths.push(tree_hash::BTreeOverlay::new(&self.#idents_b, 0)?.num_nodes()); )* - tree_hash::BTreeOverlay::from_lengths(chunk_offset, lengths) + tree_hash::BTreeOverlay::from_lengths(chunk_offset, #num_items, lengths) } fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error> { - let overlay = cache.get_overlay(cache.overlay_index, cache.chunk_index)?; + let overlay = BTreeOverlay::new(self, cache.chunk_index)?; // Skip the chunk index to the first leaf node of this struct. cache.chunk_index = overlay.first_leaf_node();