Fix bug with cached tree hash, passes tests

This commit is contained in:
Paul Hauner 2019-04-27 16:22:42 +10:00
parent b86e118062
commit 80fa5d08c5
No known key found for this signature in database
GPG Key ID: 303E4494BB28068C
6 changed files with 243 additions and 183 deletions

View File

@ -56,11 +56,6 @@ macro_rules! cached_tree_hash_tests {
// Test the updated hash
let modified = $type::random_for_test(&mut rng);
hasher.update(&modified).unwrap();
dbg!(&hasher.cache.chunk_modified);
dbg!(hasher.cache.chunk_modified.len());
dbg!(hasher.cache.chunk_index);
dbg!(hasher.cache.schemas.len());
dbg!(hasher.cache.schema_index);
assert_eq!(
hasher.tree_hash_root().unwrap(),
modified.tree_hash_root(),

View File

@ -29,6 +29,13 @@ impl Into<BTreeSchema> for BTreeOverlay {
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum LeafNode {
DoesNotExist,
Exists(Range<usize>),
Padding,
}
#[derive(Debug, PartialEq, Clone)]
pub struct BTreeOverlay {
offset: usize,
@ -82,6 +89,10 @@ impl BTreeOverlay {
self.num_leaf_nodes().trailing_zeros() as usize
}
pub fn internal_chunk_range(&self) -> Range<usize> {
self.offset..self.offset + self.num_internal_nodes()
}
pub fn chunk_range(&self) -> Range<usize> {
self.first_node()..self.next_node()
}
@ -104,13 +115,15 @@ impl BTreeOverlay {
/// - The specified node is internal.
/// - The specified node is padding.
/// - The specified node is OOB of the tree.
pub fn get_leaf_node(&self, i: usize) -> Result<Option<Range<usize>>, Error> {
if i >= self.num_nodes() - self.num_padding_leaves() {
Ok(None)
pub fn get_leaf_node(&self, i: usize) -> Result<LeafNode, Error> {
if i >= self.num_nodes() {
Ok(LeafNode::DoesNotExist)
} else if i >= self.num_nodes() - self.num_padding_leaves() {
Ok(LeafNode::Padding)
} else if (i == self.num_internal_nodes()) && (self.lengths.len() == 0) {
// If this is the first leaf node and the overlay contains zero items, return `None` as
// this node must be padding.
Ok(None)
Ok(LeafNode::Padding)
} else {
let i = i - self.num_internal_nodes();
@ -119,7 +132,7 @@ impl BTreeOverlay {
+ self.lengths.iter().take(i).sum::<usize>();
let last_node = first_node + self.lengths[i];
Ok(Some(first_node..last_node))
Ok(LeafNode::Exists(first_node..last_node))
}
}
@ -237,10 +250,28 @@ mod test {
fn get_leaf_node() {
let tree = get_tree_a(4);
assert_eq!(tree.get_leaf_node(3), Ok(Some(3..4)));
assert_eq!(tree.get_leaf_node(4), Ok(Some(4..5)));
assert_eq!(tree.get_leaf_node(5), Ok(Some(5..6)));
assert_eq!(tree.get_leaf_node(6), Ok(Some(6..7)));
assert_eq!(tree.get_leaf_node(3), Ok(LeafNode::Exists(3..4)));
assert_eq!(tree.get_leaf_node(4), Ok(LeafNode::Exists(4..5)));
assert_eq!(tree.get_leaf_node(5), Ok(LeafNode::Exists(5..6)));
assert_eq!(tree.get_leaf_node(6), Ok(LeafNode::Exists(6..7)));
assert_eq!(tree.get_leaf_node(7), Ok(LeafNode::DoesNotExist));
let tree = get_tree_a(3);
assert_eq!(tree.get_leaf_node(3), Ok(LeafNode::Exists(3..4)));
assert_eq!(tree.get_leaf_node(4), Ok(LeafNode::Exists(4..5)));
assert_eq!(tree.get_leaf_node(5), Ok(LeafNode::Exists(5..6)));
assert_eq!(tree.get_leaf_node(6), Ok(LeafNode::Padding));
assert_eq!(tree.get_leaf_node(7), Ok(LeafNode::DoesNotExist));
let tree = get_tree_a(0);
assert_eq!(tree.get_leaf_node(0), Ok(LeafNode::Padding));
assert_eq!(tree.get_leaf_node(1), Ok(LeafNode::DoesNotExist));
let tree = BTreeSchema::from_lengths(0, vec![3]).into_overlay(0);
assert_eq!(tree.get_leaf_node(0), Ok(LeafNode::Exists(0..3)));
assert_eq!(tree.get_leaf_node(1), Ok(LeafNode::DoesNotExist));
}
#[test]

View File

@ -1,4 +1,5 @@
use super::*;
use crate::btree_overlay::LeafNode;
use crate::merkleize::{merkleize, num_sanitized_leaves, sanitise_bytes};
impl<T> CachedTreeHash<Vec<T>> for Vec<T>
@ -104,7 +105,24 @@ pub fn update_tree_hash_cache<T: CachedTreeHash<T>>(
let mut buf = vec![0; HASHSIZE];
let item_bytes = HASHSIZE / T::tree_hash_packing_factor();
// Iterate through each of the leaf nodes.
// If the number of leaf nodes has changed, resize the cache.
if new_overlay.num_leaf_nodes() < old_overlay.num_leaf_nodes() {
let start = new_overlay.next_node();
let end = start + (old_overlay.num_leaf_nodes() - new_overlay.num_leaf_nodes());
cache.splice(start..end, vec![], vec![]);
} else if new_overlay.num_leaf_nodes() > old_overlay.num_leaf_nodes() {
let start = old_overlay.next_node();
let new_nodes = new_overlay.num_leaf_nodes() - old_overlay.num_leaf_nodes();
cache.splice(
start..start,
vec![0; new_nodes * HASHSIZE],
vec![true; new_nodes],
);
}
// Iterate through each of the leaf nodes in the new list.
for i in 0..new_overlay.num_leaf_nodes() {
// Iterate through the number of items that may be packing into the leaf node.
for j in 0..T::tree_hash_packing_factor() {
@ -129,85 +147,92 @@ pub fn update_tree_hash_cache<T: CachedTreeHash<T>>(
}
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
for i in 0..new_overlay.num_leaf_nodes() {
// Adjust `i` so it is a leaf node for each of the overlays.
let old_i = i + old_overlay.num_internal_nodes();
let new_i = i + new_overlay.num_internal_nodes();
for i in 0..std::cmp::max(new_overlay.num_leaf_nodes(), old_overlay.num_leaf_nodes()) {
match (
old_overlay.get_leaf_node(old_i)?,
new_overlay.get_leaf_node(new_i)?,
old_overlay.get_leaf_node(i + old_overlay.num_internal_nodes())?,
new_overlay.get_leaf_node(i + new_overlay.num_internal_nodes())?,
) {
// The item existed in the previous list and exists in the current list.
(Some(_old), Some(new)) => {
//
// Update the item.
(LeafNode::Exists(_old), LeafNode::Exists(new)) => {
cache.chunk_index = new.start;
vec[i].update_tree_hash_cache(cache)?;
}
// The item did not exist in the previous list but does exist in this list.
// The list has been lengthened and this is a new item that did not exist in
// the previous list.
//
// Viz., the list has been lengthened.
(None, Some(new)) => {
let (bytes, mut bools, schemas) =
TreeHashCache::new(&vec[i], new_overlay.depth + 1)?.into_components();
// Splice the tree for the new item into the current chunk_index.
(LeafNode::DoesNotExist, LeafNode::Exists(new)) => {
splice_in_new_tree(&vec[i], new.start..new.start, new_overlay.depth + 1, cache)?;
// Record the number of schemas, this will be used later in the fn.
let num_schemas = schemas.len();
// Flag the root node of the new tree as dirty.
bools[0] = true;
cache.splice(new.start..new.start + 1, bytes, bools);
cache
.schemas
.splice(cache.schema_index..cache.schema_index, schemas);
cache.schema_index += num_schemas;
cache.chunk_index = new.end;
}
// The item existed in the previous list but does not exist in this list.
// The list has been lengthened and this is a new item that was prevously a
// padding item.
//
// Viz., the list has been shortened.
(Some(old), None) => {
if vec.len() == 0 {
// In this case, the list has been made empty and we should make
// this node padding.
cache.maybe_update_chunk(new_overlay.root(), &[0; HASHSIZE])?;
} else {
let old_internal_nodes = old_overlay.num_internal_nodes();
let new_internal_nodes = new_overlay.num_internal_nodes();
// Splice the tree for the new item over the padding chunk.
(LeafNode::Padding, LeafNode::Exists(new)) => {
splice_in_new_tree(&vec[i], new.start..new.start + 1, new_overlay.depth + 1, cache)?;
// If the number of internal nodes have shifted between the two
// overlays, the range for this node needs to be shifted to suit the
// new overlay.
let old = if old_internal_nodes > new_internal_nodes {
let offset = old_internal_nodes - new_internal_nodes;
old.start - offset..old.end - offset
} else if old_internal_nodes < new_internal_nodes {
let offset = new_internal_nodes - old_internal_nodes;
old.start + offset..old.end + offset
} else {
old.start..old.end
};
// If there are still some old bytes left-over from this item, replace
// them with a padding chunk.
if old.start < new_overlay.chunk_range().end {
let start_chunk = old.start;
let end_chunk =
std::cmp::min(old.end, new_overlay.chunk_range().end);
// In this case, there are some items in the new list and we should
// splice out the entire tree of the removed node, replacing it
// with a single padding node.
cache.splice(start_chunk..end_chunk, vec![0; HASHSIZE], vec![true]);
}
}
cache.chunk_index = new.end;
}
// The item didn't exist in the old list and doesn't exist in the new list,
// nothing to do.
(None, None) => {}
// The list has been shortened and this item was removed from the list and made
// into padding.
//
// Splice a padding node over the number of nodes the previous item occupied,
// starting at the current chunk_index.
(LeafNode::Exists(old), LeafNode::Padding) => {
let num_chunks = old.end - old.start;
cache.splice(
cache.chunk_index..cache.chunk_index + num_chunks,
vec![0; HASHSIZE],
vec![true],
);
cache.chunk_index += 1;
}
// The list has been shortened and the item for this leaf existed in the
// previous list, but does not exist in this list.
//
// Remove the number of nodes the previous item occupied, starting at the
// current chunk_index.
(LeafNode::Exists(old), LeafNode::DoesNotExist) => {
let num_chunks = old.end - old.start;
cache.splice(
cache.chunk_index..cache.chunk_index + num_chunks,
vec![],
vec![],
);
}
// The list has been shortened and this leaf was padding in the previous list,
// however it should not exist in this list.
//
// Remove one node, starting at the current `chunk_index`.
(LeafNode::Padding, LeafNode::DoesNotExist) => {
cache.splice(cache.chunk_index..cache.chunk_index + 1, vec![], vec![]);
}
// The list has been lengthened and this leaf did not exist in the previous
// list, but should be padding for this list.
//
// Splice in a new padding node at the current chunk_index.
(LeafNode::DoesNotExist, LeafNode::Padding) => {
cache.splice(
cache.chunk_index..cache.chunk_index,
vec![0; HASHSIZE],
vec![true],
);
cache.chunk_index += 1;
}
// This leaf was padding in both lists, there's nothing to do.
(LeafNode::Padding, LeafNode::Padding) => (),
// As we are looping through the larger of the lists of leaf nodes, it should
// be impossible for either leaf to be non-existant.
(LeafNode::DoesNotExist, LeafNode::DoesNotExist) => unreachable!(),
}
}
@ -224,6 +249,34 @@ pub fn update_tree_hash_cache<T: CachedTreeHash<T>>(
Ok(new_overlay)
}
fn splice_in_new_tree<T>(
item: &T,
chunks_to_replace: Range<usize>,
depth: usize,
cache: &mut TreeHashCache,
) -> Result<(), Error>
where T: CachedTreeHash<T>
{
let (bytes, mut bools, schemas) =
TreeHashCache::new(item, depth)?.into_components();
// Record the number of schemas, this will be used later in the fn.
let num_schemas = schemas.len();
// Flag the root node of the new tree as dirty.
bools[0] = true;
cache.splice(chunks_to_replace, bytes, bools);
cache
.schemas
.splice(cache.schema_index..cache.schema_index, schemas);
cache.schema_index += num_schemas;
Ok(())
//
}
fn get_packed_leaves<T>(vec: &Vec<T>) -> Result<Vec<u8>, Error>
where
T: CachedTreeHash<T>,

View File

@ -1,56 +1,24 @@
use super::*;
use std::cmp::min;
/// New vec is bigger than old vec.
pub fn grow_merkle_cache(
pub fn grow_merkle_tree(
old_bytes: &[u8],
old_flags: &[bool],
from_height: usize,
to_height: usize,
) -> Option<(Vec<u8>, Vec<bool>)> {
// Determine the size of our new tree. It is not just a simple `1 << to_height` as there can be
// an arbitrary number of nodes in `old_bytes` leaves if those leaves are subtrees.
let to_nodes = {
let old_nodes = old_bytes.len() / HASHSIZE;
let additional_nodes = old_nodes - nodes_in_tree_of_height(from_height);
nodes_in_tree_of_height(to_height) + additional_nodes
};
let to_nodes = nodes_in_tree_of_height(to_height);
let mut bytes = vec![0; to_nodes * HASHSIZE];
let mut flags = vec![true; to_nodes];
let leaf_level = from_height;
for i in 0..=from_height {
let old_byte_slice = old_bytes.get(byte_range_at_height(i))?;
let old_flag_slice = old_flags.get(node_range_at_height(i))?;
for i in 0..=from_height as usize {
// If we're on the leaf slice, grab the first byte and all the of the bytes after that.
// This is required because we can have an arbitrary number of bytes at the leaf level
// (e.g., the case where there are subtrees as leaves).
//
// If we're not on a leaf level, the number of nodes is fixed and known.
let (old_byte_slice, old_flag_slice) = if i == leaf_level {
(
old_bytes.get(first_byte_at_height(i)..)?,
old_flags.get(first_node_at_height(i)..)?,
)
} else {
(
old_bytes.get(byte_range_at_height(i))?,
old_flags.get(node_range_at_height(i))?,
)
};
let new_i = i + to_height - from_height;
let (new_byte_slice, new_flag_slice) = if i == leaf_level {
(
bytes.get_mut(first_byte_at_height(new_i)..)?,
flags.get_mut(first_node_at_height(new_i)..)?,
)
} else {
(
bytes.get_mut(byte_range_at_height(new_i))?,
flags.get_mut(node_range_at_height(new_i))?,
)
};
let offset = i + to_height - from_height;
let new_byte_slice = bytes.get_mut(byte_range_at_height(offset))?;
let new_flag_slice = flags.get_mut(node_range_at_height(offset))?;
new_byte_slice
.get_mut(0..old_byte_slice.len())?
@ -64,58 +32,33 @@ pub fn grow_merkle_cache(
}
/// New vec is smaller than old vec.
pub fn shrink_merkle_cache(
pub fn shrink_merkle_tree(
from_bytes: &[u8],
from_flags: &[bool],
from_height: usize,
to_height: usize,
to_nodes: usize,
) -> Option<(Vec<u8>, Vec<bool>)> {
let to_nodes = nodes_in_tree_of_height(to_height);
let mut bytes = vec![0; to_nodes * HASHSIZE];
let mut flags = vec![true; to_nodes];
for i in 0..=to_height as usize {
let from_i = i + from_height - to_height;
let offset = i + from_height - to_height;
let from_byte_slice = from_bytes.get(byte_range_at_height(offset))?;
let from_flag_slice = from_flags.get(node_range_at_height(offset))?;
let (from_byte_slice, from_flag_slice) = if from_i == from_height {
(
from_bytes.get(first_byte_at_height(from_i)..)?,
from_flags.get(first_node_at_height(from_i)..)?,
)
} else {
(
from_bytes.get(byte_range_at_height(from_i))?,
from_flags.get(node_range_at_height(from_i))?,
)
};
let to_byte_slice = bytes.get_mut(byte_range_at_height(i))?;
let to_flag_slice = flags.get_mut(node_range_at_height(i))?;
let (to_byte_slice, to_flag_slice) = if i == to_height {
(
bytes.get_mut(first_byte_at_height(i)..)?,
flags.get_mut(first_node_at_height(i)..)?,
)
} else {
(
bytes.get_mut(byte_range_at_height(i))?,
flags.get_mut(node_range_at_height(i))?,
)
};
let num_bytes = min(from_byte_slice.len(), to_byte_slice.len());
let num_flags = min(from_flag_slice.len(), to_flag_slice.len());
to_byte_slice
.get_mut(0..num_bytes)?
.copy_from_slice(from_byte_slice.get(0..num_bytes)?);
to_flag_slice
.get_mut(0..num_flags)?
.copy_from_slice(from_flag_slice.get(0..num_flags)?);
to_byte_slice.copy_from_slice(from_byte_slice.get(0..to_byte_slice.len())?);
to_flag_slice.copy_from_slice(from_flag_slice.get(0..to_flag_slice.len())?);
}
Some((bytes, flags))
}
fn nodes_in_tree_of_height(h: usize) -> usize {
pub fn nodes_in_tree_of_height(h: usize) -> usize {
2 * (1 << h) - 1
}
@ -128,10 +71,6 @@ fn node_range_at_height(h: usize) -> Range<usize> {
first_node_at_height(h)..last_node_at_height(h) + 1
}
fn first_byte_at_height(h: usize) -> usize {
first_node_at_height(h) * HASHSIZE
}
fn first_node_at_height(h: usize) -> usize {
(1 << h) - 1
}
@ -152,7 +91,7 @@ mod test {
let original_bytes = vec![42; small * HASHSIZE];
let original_flags = vec![false; small];
let (grown_bytes, grown_flags) = grow_merkle_cache(
let (grown_bytes, grown_flags) = grow_merkle_tree(
&original_bytes,
&original_flags,
(small + 1).trailing_zeros() as usize - 1,
@ -200,12 +139,11 @@ mod test {
assert_eq!(expected_bytes, grown_bytes);
assert_eq!(expected_flags, grown_flags);
let (shrunk_bytes, shrunk_flags) = shrink_merkle_cache(
let (shrunk_bytes, shrunk_flags) = shrink_merkle_tree(
&grown_bytes,
&grown_flags,
(big + 1).trailing_zeros() as usize - 1,
(small + 1).trailing_zeros() as usize - 1,
small,
)
.unwrap();
@ -221,7 +159,7 @@ mod test {
let original_bytes = vec![42; small * HASHSIZE];
let original_flags = vec![false; small];
let (grown_bytes, grown_flags) = grow_merkle_cache(
let (grown_bytes, grown_flags) = grow_merkle_tree(
&original_bytes,
&original_flags,
(small + 1).trailing_zeros() as usize - 1,
@ -269,12 +207,11 @@ mod test {
assert_eq!(expected_bytes, grown_bytes);
assert_eq!(expected_flags, grown_flags);
let (shrunk_bytes, shrunk_flags) = shrink_merkle_cache(
let (shrunk_bytes, shrunk_flags) = shrink_merkle_tree(
&grown_bytes,
&grown_flags,
(big + 1).trailing_zeros() as usize - 1,
(small + 1).trailing_zeros() as usize - 1,
small,
)
.unwrap();

View File

@ -134,7 +134,6 @@ impl TreeHashCache {
new_overlay: BTreeOverlay,
) -> Result<BTreeOverlay, Error> {
let old_overlay = self.get_overlay(schema_index, chunk_index)?;
// If the merkle tree required to represent the new list is of a different size to the one
// required for the previous list, then update our cache.
//
@ -143,31 +142,40 @@ impl TreeHashCache {
if new_overlay.num_leaf_nodes() != old_overlay.num_leaf_nodes() {
// Get slices of the existing tree from the cache.
let (old_bytes, old_flags) = self
.slices(old_overlay.chunk_range())
.slices(old_overlay.internal_chunk_range())
.ok_or_else(|| Error::UnableToObtainSlices)?;
let (new_bytes, new_bools) =
let (new_bytes, new_flags) = if new_overlay.num_internal_nodes() == 0 {
(vec![], vec![])
} else if old_overlay.num_internal_nodes() == 0 {
let nodes = resize::nodes_in_tree_of_height(new_overlay.height() - 1);
(vec![0; nodes * HASHSIZE], vec![true; nodes])
} else {
if new_overlay.num_leaf_nodes() > old_overlay.num_leaf_nodes() {
resize::grow_merkle_cache(
resize::grow_merkle_tree(
old_bytes,
old_flags,
old_overlay.height(),
new_overlay.height(),
old_overlay.height() - 1,
new_overlay.height() - 1,
)
.ok_or_else(|| Error::UnableToGrowMerkleTree)?
} else {
resize::shrink_merkle_cache(
resize::shrink_merkle_tree(
old_bytes,
old_flags,
old_overlay.height(),
new_overlay.height(),
new_overlay.num_chunks(),
old_overlay.height() - 1,
new_overlay.height() - 1,
)
.ok_or_else(|| Error::UnableToShrinkMerkleTree)?
};
}
};
// Splice the newly created `TreeHashCache` over the existing elements.
self.splice(old_overlay.chunk_range(), new_bytes, new_bools);
assert_eq!(old_overlay.num_internal_nodes(), old_flags.len());
assert_eq!(new_overlay.num_internal_nodes(), new_flags.len());
// Splice the resized created elements over the existing elements.
self.splice(old_overlay.internal_chunk_range(), new_bytes, new_flags);
}
let old_schema = std::mem::replace(&mut self.schemas[schema_index], new_overlay.into());

View File

@ -10,7 +10,7 @@ pub struct NestedStruct {
fn test_routine<T>(original: T, modified: Vec<T>)
where
T: CachedTreeHash<T>,
T: CachedTreeHash<T> + std::fmt::Debug,
{
let mut hasher = CachedTreeHasher::new(&original).unwrap();
@ -20,10 +20,23 @@ where
for (i, modified) in modified.iter().enumerate() {
println!("-- Start of modification {} --", i);
// Test after a modification
// Update the existing hasher.
hasher
.update(modified)
.expect(&format!("Modification {}", i));
// Create a new hasher from the "modified" struct.
let modified_hasher = CachedTreeHasher::new(modified).unwrap();
// Test that the modified hasher has the same number of chunks as a newly built hasher.
assert_eq!(
hasher.cache.chunk_modified.len(),
modified_hasher.cache.chunk_modified.len(),
"Number of chunks is different"
);
// Test the root generated by the updated hasher matches a non-cached tree hash root.
let standard_root = modified.tree_hash_root();
let cached_root = hasher
.tree_hash_root()
@ -73,7 +86,7 @@ fn test_inner() {
}
#[test]
fn test_vec() {
fn test_vec_of_u64() {
let original: Vec<u64> = vec![1, 2, 3, 4, 5];
let modified: Vec<Vec<u64>> = vec![
@ -113,6 +126,29 @@ fn test_nested_list_of_u64() {
test_routine(original, modified);
}
#[test]
fn test_shrinking_vec_of_vec() {
let original: Vec<Vec<u64>> = vec![vec![1], vec![2], vec![3], vec![4], vec![5]];
let modified: Vec<Vec<u64>> = original[0..3].to_vec();
let new_hasher = CachedTreeHasher::new(&modified).unwrap();
let mut modified_hasher = CachedTreeHasher::new(&original).unwrap();
modified_hasher.update(&modified).unwrap();
assert_eq!(
new_hasher.cache.schemas.len(),
modified_hasher.cache.schemas.len(),
"Schema count is different"
);
assert_eq!(
new_hasher.cache.chunk_modified.len(),
modified_hasher.cache.chunk_modified.len(),
"Chunk count is different"
);
}
#[derive(Clone, Debug, TreeHash, CachedTreeHash)]
pub struct StructWithVec {
pub a: u64,