From 0b5c10212d7f08d42f5d7bfe73a640fc2cb431ed Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 15 Apr 2019 11:14:30 +1000 Subject: [PATCH] Move tree_hash from ssz into own crate --- Cargo.toml | 1 + eth2/utils/ssz/src/cached_tree_hash.rs | 439 ------------------ eth2/utils/ssz/src/lib.rs | 1 - eth2/utils/tree_hash/Cargo.toml | 11 + eth2/utils/tree_hash/src/btree_overlay.rs | 0 eth2/utils/tree_hash/src/cached_tree_hash.rs | 193 ++++++++ .../src}/impls.rs | 2 +- eth2/utils/tree_hash/src/lib.rs | 249 ++++++++++ .../src}/resize.rs | 0 .../tests}/tests.rs | 8 +- 10 files changed, 461 insertions(+), 443 deletions(-) delete mode 100644 eth2/utils/ssz/src/cached_tree_hash.rs create mode 100644 eth2/utils/tree_hash/Cargo.toml create mode 100644 eth2/utils/tree_hash/src/btree_overlay.rs create mode 100644 eth2/utils/tree_hash/src/cached_tree_hash.rs rename eth2/utils/{ssz/src/cached_tree_hash => tree_hash/src}/impls.rs (99%) create mode 100644 eth2/utils/tree_hash/src/lib.rs rename eth2/utils/{ssz/src/cached_tree_hash => tree_hash/src}/resize.rs (100%) rename eth2/utils/{ssz/src/cached_tree_hash => tree_hash/tests}/tests.rs (99%) diff --git a/Cargo.toml b/Cargo.toml index 5c9593f5a..2574d328f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ members = [ "eth2/utils/ssz", "eth2/utils/ssz_derive", "eth2/utils/swap_or_not_shuffle", + "eth2/utils/tree_hash", "eth2/utils/fisher_yates_shuffle", "eth2/utils/test_random_derive", "beacon_node", diff --git a/eth2/utils/ssz/src/cached_tree_hash.rs b/eth2/utils/ssz/src/cached_tree_hash.rs deleted file mode 100644 index f7d18c57c..000000000 --- a/eth2/utils/ssz/src/cached_tree_hash.rs +++ /dev/null @@ -1,439 +0,0 @@ -use hashing::hash; -use int_to_bytes::int_to_bytes32; -use std::fmt::Debug; -use std::iter::Iterator; -use std::ops::Range; - -mod impls; -mod resize; -mod tests; - -const BYTES_PER_CHUNK: usize = 32; -const HASHSIZE: usize = 32; -const MERKLE_HASH_CHUNCK: usize = 2 * BYTES_PER_CHUNK; - -#[derive(Debug, PartialEq, Clone)] -pub enum Error { - ShouldNotProduceBTreeOverlay, - NoFirstNode, - NoBytesForRoot, - UnableToObtainSlices, - UnableToGrowMerkleTree, - UnableToShrinkMerkleTree, - BytesAreNotEvenChunks(usize), - NoModifiedFieldForChunk(usize), - NoBytesForChunk(usize), -} - -#[derive(Debug, PartialEq, Clone)] -pub enum ItemType { - Basic, - List, - Composite, -} - -// TODO: remove debug requirement. -pub trait CachedTreeHash: Debug { - fn item_type() -> ItemType; - - fn build_tree_hash_cache(&self) -> Result; - - /// Return the number of bytes when this element is encoded as raw SSZ _without_ length - /// prefixes. - fn num_bytes(&self) -> usize; - - fn offsets(&self) -> Result, Error>; - - fn num_child_nodes(&self) -> usize; - - fn packed_encoding(&self) -> Vec; - - fn packing_factor() -> usize; - - fn cached_hash_tree_root( - &self, - other: &Item, - cache: &mut TreeHashCache, - chunk: usize, - ) -> Result; -} - -#[derive(Debug, PartialEq, Clone)] -pub struct TreeHashCache { - cache: Vec, - chunk_modified: Vec, -} - -impl Into> for TreeHashCache { - fn into(self) -> Vec { - self.cache - } -} - -impl TreeHashCache { - pub fn new(item: &T) -> Result - where - T: CachedTreeHash, - { - item.build_tree_hash_cache() - } - - pub fn from_elems(cache: Vec, chunk_modified: Vec) -> Self { - Self { - cache, - chunk_modified, - } - } - - pub fn from_leaves_and_subtrees( - item: &T, - leaves_and_subtrees: Vec, - ) -> Result - where - T: CachedTreeHash, - { - let offset_handler = BTreeOverlay::new(item, 0)?; - - // Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out - // later. - let num_provided_leaf_nodes = leaves_and_subtrees.len(); - - // Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill - // all the to-be-built internal nodes with zeros and append the leaves and subtrees. - let internal_node_bytes = offset_handler.num_internal_nodes * BYTES_PER_CHUNK; - let leaves_and_subtrees_bytes = leaves_and_subtrees - .iter() - .fold(0, |acc, t| acc + t.bytes_len()); - let mut cache = Vec::with_capacity(leaves_and_subtrees_bytes + internal_node_bytes); - cache.resize(internal_node_bytes, 0); - - // Allocate enough bytes to store all the leaves. - let mut leaves = Vec::with_capacity(offset_handler.num_leaf_nodes * HASHSIZE); - - // Iterate through all of the leaves/subtrees, adding their root as a leaf node and then - // concatenating their merkle trees. - for t in leaves_and_subtrees { - leaves.append(&mut t.root()?); - cache.append(&mut t.into_merkle_tree()); - } - - // Pad the leaves to an even power-of-two, using zeros. - pad_for_leaf_count(num_provided_leaf_nodes, &mut cache); - - // Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros - // internal nodes created earlier with the internal nodes generated by `merkleize`. - let mut merkleized = merkleize(leaves); - merkleized.split_off(internal_node_bytes); - cache.splice(0..internal_node_bytes, merkleized); - - Ok(Self { - chunk_modified: vec![false; cache.len() / BYTES_PER_CHUNK], - cache, - }) - } - - pub fn from_bytes(bytes: Vec, initial_modified_state: bool) -> Result { - if bytes.len() % BYTES_PER_CHUNK > 0 { - return Err(Error::BytesAreNotEvenChunks(bytes.len())); - } - - Ok(Self { - chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK], - cache: bytes, - }) - } - - pub fn bytes_len(&self) -> usize { - self.cache.len() - } - - pub fn root(&self) -> Result, Error> { - self.cache - .get(0..HASHSIZE) - .ok_or_else(|| Error::NoBytesForRoot) - .and_then(|slice| Ok(slice.to_vec())) - } - - pub fn splice(&mut self, chunk_range: Range, replace_with: Self) { - let (bytes, bools) = replace_with.into_components(); - - // Update the `chunk_modified` vec, marking all spliced-in nodes as changed. - self.chunk_modified.splice(chunk_range.clone(), bools); - self.cache - .splice(node_range_to_byte_range(&chunk_range), bytes); - } - - pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { - let start = chunk * BYTES_PER_CHUNK; - let end = start + BYTES_PER_CHUNK; - - if !self.chunk_equals(chunk, to)? { - self.cache - .get_mut(start..end) - .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))? - .copy_from_slice(to); - self.chunk_modified[chunk] = true; - } - - Ok(()) - } - - pub fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { - Some(( - self.cache.get(node_range_to_byte_range(&chunk_range))?, - self.chunk_modified.get(chunk_range)?, - )) - } - - pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { - let start = chunk * BYTES_PER_CHUNK; - let end = start + BYTES_PER_CHUNK; - - self.cache - .get_mut(start..end) - .ok_or_else(|| Error::NoBytesForChunk(chunk))? - .copy_from_slice(to); - - self.chunk_modified[chunk] = true; - - Ok(()) - } - - pub fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { - let start = chunk * BYTES_PER_CHUNK; - let end = start + BYTES_PER_CHUNK; - - Ok(self - .cache - .get(start..end) - .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?) - } - - pub fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { - Ok(self.get_chunk(chunk)? == other) - } - - pub fn changed(&self, chunk: usize) -> Result { - self.chunk_modified - .get(chunk) - .cloned() - .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk)) - } - - pub fn either_modified(&self, children: (&usize, &usize)) -> Result { - Ok(self.changed(*children.0)? | self.changed(*children.1)?) - } - - pub fn hash_children(&self, children: (&usize, &usize)) -> Result, Error> { - let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2); - child_bytes.append(&mut self.get_chunk(*children.0)?.to_vec()); - child_bytes.append(&mut self.get_chunk(*children.1)?.to_vec()); - - Ok(hash(&child_bytes)) - } - - pub fn mix_in_length(&self, chunk: usize, length: usize) -> Result, Error> { - let mut bytes = Vec::with_capacity(2 * BYTES_PER_CHUNK); - - bytes.append(&mut self.get_chunk(chunk)?.to_vec()); - bytes.append(&mut int_to_bytes32(length as u64)); - - Ok(hash(&bytes)) - } - - pub fn into_merkle_tree(self) -> Vec { - self.cache - } - - pub fn into_components(self) -> (Vec, Vec) { - (self.cache, self.chunk_modified) - } -} - -fn children(parent: usize) -> (usize, usize) { - ((2 * parent + 1), (2 * parent + 2)) -} - -fn num_nodes(num_leaves: usize) -> usize { - 2 * num_leaves - 1 -} - -fn node_range_to_byte_range(node_range: &Range) -> Range { - node_range.start * HASHSIZE..node_range.end * HASHSIZE -} - -#[derive(Debug)] -pub struct BTreeOverlay { - num_internal_nodes: usize, - pub num_leaf_nodes: usize, - first_node: usize, - next_node: usize, - offsets: Vec, -} - -impl BTreeOverlay { - pub fn new(item: &T, initial_offset: usize) -> Result - where - T: CachedTreeHash, - { - Self::from_lengths(initial_offset, item.offsets()?) - } - - fn from_lengths(offset: usize, mut lengths: Vec) -> Result { - // Extend it to the next power-of-two, if it is not already. - let num_leaf_nodes = if lengths.len().is_power_of_two() { - lengths.len() - } else { - let num_leaf_nodes = lengths.len().next_power_of_two(); - lengths.resize(num_leaf_nodes, 1); - num_leaf_nodes - }; - - let num_nodes = num_nodes(num_leaf_nodes); - let num_internal_nodes = num_nodes - num_leaf_nodes; - - let mut offsets = Vec::with_capacity(num_nodes); - offsets.append(&mut (offset..offset + num_internal_nodes).collect()); - - let mut next_node = num_internal_nodes + offset; - for i in 0..num_leaf_nodes { - offsets.push(next_node); - next_node += lengths[i]; - } - - Ok(Self { - num_internal_nodes, - num_leaf_nodes, - offsets, - first_node: offset, - next_node, - }) - } - - pub fn root(&self) -> usize { - self.first_node - } - - pub fn height(&self) -> usize { - self.num_leaf_nodes.trailing_zeros() as usize - } - - pub fn chunk_range(&self) -> Range { - self.first_node..self.next_node - } - - pub fn total_chunks(&self) -> usize { - self.next_node - self.first_node - } - - pub fn total_nodes(&self) -> usize { - self.num_internal_nodes + self.num_leaf_nodes - } - - pub fn first_leaf_node(&self) -> Result { - self.offsets - .get(self.num_internal_nodes) - .cloned() - .ok_or_else(|| Error::NoFirstNode) - } - - pub fn next_node(&self) -> usize { - self.next_node - } - - /// Returns an iterator visiting each internal node, providing the left and right child chunks - /// for the node. - pub fn iter_internal_nodes<'a>( - &'a self, - ) -> impl DoubleEndedIterator { - let internal_nodes = &self.offsets[0..self.num_internal_nodes]; - - internal_nodes.iter().enumerate().map(move |(i, parent)| { - let children = children(i); - ( - parent, - (&self.offsets[children.0], &self.offsets[children.1]), - ) - }) - } - - /// Returns an iterator visiting each leaf node, providing the chunk for that node. - pub fn iter_leaf_nodes<'a>(&'a self) -> impl DoubleEndedIterator { - let leaf_nodes = &self.offsets[self.num_internal_nodes..]; - - leaf_nodes.iter() - } -} - -/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize -/// them, returning the entire merkle tree. -/// -/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`. -pub fn merkleize(values: Vec) -> Vec { - let values = sanitise_bytes(values); - - let leaves = values.len() / HASHSIZE; - - if leaves == 0 { - panic!("No full leaves"); - } - - if !leaves.is_power_of_two() { - panic!("leaves is not power of two"); - } - - let mut o: Vec = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE]; - o.append(&mut values.to_vec()); - - let mut i = o.len(); - let mut j = o.len() - values.len(); - - while i >= MERKLE_HASH_CHUNCK { - i -= MERKLE_HASH_CHUNCK; - let hash = hash(&o[i..i + MERKLE_HASH_CHUNCK]); - - j -= HASHSIZE; - o[j..j + HASHSIZE].copy_from_slice(&hash); - } - - o -} - -pub fn sanitise_bytes(mut bytes: Vec) -> Vec { - let present_leaves = num_unsanitized_leaves(bytes.len()); - let required_leaves = present_leaves.next_power_of_two(); - - if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) { - bytes.resize(num_bytes(required_leaves), 0); - } - - bytes -} - -fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { - let required_leaves = num_leaves.next_power_of_two(); - - bytes.resize( - bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK, - 0, - ); -} - -fn last_leaf_needs_padding(num_bytes: usize) -> bool { - num_bytes % HASHSIZE != 0 -} - -/// Rounds up -fn num_unsanitized_leaves(num_bytes: usize) -> usize { - (num_bytes + HASHSIZE - 1) / HASHSIZE -} - -/// Rounds up -fn num_sanitized_leaves(num_bytes: usize) -> usize { - let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE; - leaves.next_power_of_two() -} - -fn num_bytes(num_leaves: usize) -> usize { - num_leaves * HASHSIZE -} diff --git a/eth2/utils/ssz/src/lib.rs b/eth2/utils/ssz/src/lib.rs index f86749c66..cb3f63c48 100644 --- a/eth2/utils/ssz/src/lib.rs +++ b/eth2/utils/ssz/src/lib.rs @@ -10,7 +10,6 @@ extern crate bytes; extern crate ethereum_types; -mod cached_tree_hash; pub mod decode; pub mod encode; mod signed_root; diff --git a/eth2/utils/tree_hash/Cargo.toml b/eth2/utils/tree_hash/Cargo.toml new file mode 100644 index 000000000..243a49446 --- /dev/null +++ b/eth2/utils/tree_hash/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "tree_hash" +version = "0.1.0" +authors = ["Paul Hauner "] +edition = "2018" + +[dependencies] +ethereum-types = "0.5" +hashing = { path = "../hashing" } +int_to_bytes = { path = "../int_to_bytes" } +ssz = { path = "../ssz" } diff --git a/eth2/utils/tree_hash/src/btree_overlay.rs b/eth2/utils/tree_hash/src/btree_overlay.rs new file mode 100644 index 000000000..e69de29bb diff --git a/eth2/utils/tree_hash/src/cached_tree_hash.rs b/eth2/utils/tree_hash/src/cached_tree_hash.rs new file mode 100644 index 000000000..556ba2d21 --- /dev/null +++ b/eth2/utils/tree_hash/src/cached_tree_hash.rs @@ -0,0 +1,193 @@ +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub struct TreeHashCache { + cache: Vec, + chunk_modified: Vec, +} + +impl Into> for TreeHashCache { + fn into(self) -> Vec { + self.cache + } +} + +impl TreeHashCache { + pub fn new(item: &T) -> Result + where + T: CachedTreeHash, + { + item.build_tree_hash_cache() + } + + pub fn from_elems(cache: Vec, chunk_modified: Vec) -> Self { + Self { + cache, + chunk_modified, + } + } + + pub fn from_leaves_and_subtrees( + item: &T, + leaves_and_subtrees: Vec, + ) -> Result + where + T: CachedTreeHash, + { + let offset_handler = BTreeOverlay::new(item, 0)?; + + // Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out + // later. + let num_provided_leaf_nodes = leaves_and_subtrees.len(); + + // Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill + // all the to-be-built internal nodes with zeros and append the leaves and subtrees. + let internal_node_bytes = offset_handler.num_internal_nodes * BYTES_PER_CHUNK; + let leaves_and_subtrees_bytes = leaves_and_subtrees + .iter() + .fold(0, |acc, t| acc + t.bytes_len()); + let mut cache = Vec::with_capacity(leaves_and_subtrees_bytes + internal_node_bytes); + cache.resize(internal_node_bytes, 0); + + // Allocate enough bytes to store all the leaves. + let mut leaves = Vec::with_capacity(offset_handler.num_leaf_nodes * HASHSIZE); + + // Iterate through all of the leaves/subtrees, adding their root as a leaf node and then + // concatenating their merkle trees. + for t in leaves_and_subtrees { + leaves.append(&mut t.root()?); + cache.append(&mut t.into_merkle_tree()); + } + + // Pad the leaves to an even power-of-two, using zeros. + pad_for_leaf_count(num_provided_leaf_nodes, &mut cache); + + // Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros + // internal nodes created earlier with the internal nodes generated by `merkleize`. + let mut merkleized = merkleize(leaves); + merkleized.split_off(internal_node_bytes); + cache.splice(0..internal_node_bytes, merkleized); + + Ok(Self { + chunk_modified: vec![false; cache.len() / BYTES_PER_CHUNK], + cache, + }) + } + + pub fn from_bytes(bytes: Vec, initial_modified_state: bool) -> Result { + if bytes.len() % BYTES_PER_CHUNK > 0 { + return Err(Error::BytesAreNotEvenChunks(bytes.len())); + } + + Ok(Self { + chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK], + cache: bytes, + }) + } + + pub fn bytes_len(&self) -> usize { + self.cache.len() + } + + pub fn root(&self) -> Result, Error> { + self.cache + .get(0..HASHSIZE) + .ok_or_else(|| Error::NoBytesForRoot) + .and_then(|slice| Ok(slice.to_vec())) + } + + pub fn splice(&mut self, chunk_range: Range, replace_with: Self) { + let (bytes, bools) = replace_with.into_components(); + + // Update the `chunk_modified` vec, marking all spliced-in nodes as changed. + self.chunk_modified.splice(chunk_range.clone(), bools); + self.cache + .splice(node_range_to_byte_range(&chunk_range), bytes); + } + + pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { + let start = chunk * BYTES_PER_CHUNK; + let end = start + BYTES_PER_CHUNK; + + if !self.chunk_equals(chunk, to)? { + self.cache + .get_mut(start..end) + .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))? + .copy_from_slice(to); + self.chunk_modified[chunk] = true; + } + + Ok(()) + } + + pub fn slices(&self, chunk_range: Range) -> Option<(&[u8], &[bool])> { + Some(( + self.cache.get(node_range_to_byte_range(&chunk_range))?, + self.chunk_modified.get(chunk_range)?, + )) + } + + pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> { + let start = chunk * BYTES_PER_CHUNK; + let end = start + BYTES_PER_CHUNK; + + self.cache + .get_mut(start..end) + .ok_or_else(|| Error::NoBytesForChunk(chunk))? + .copy_from_slice(to); + + self.chunk_modified[chunk] = true; + + Ok(()) + } + + pub fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> { + let start = chunk * BYTES_PER_CHUNK; + let end = start + BYTES_PER_CHUNK; + + Ok(self + .cache + .get(start..end) + .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?) + } + + pub fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result { + Ok(self.get_chunk(chunk)? == other) + } + + pub fn changed(&self, chunk: usize) -> Result { + self.chunk_modified + .get(chunk) + .cloned() + .ok_or_else(|| Error::NoModifiedFieldForChunk(chunk)) + } + + pub fn either_modified(&self, children: (&usize, &usize)) -> Result { + Ok(self.changed(*children.0)? | self.changed(*children.1)?) + } + + pub fn hash_children(&self, children: (&usize, &usize)) -> Result, Error> { + let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2); + child_bytes.append(&mut self.get_chunk(*children.0)?.to_vec()); + child_bytes.append(&mut self.get_chunk(*children.1)?.to_vec()); + + Ok(hash(&child_bytes)) + } + + pub fn mix_in_length(&self, chunk: usize, length: usize) -> Result, Error> { + let mut bytes = Vec::with_capacity(2 * BYTES_PER_CHUNK); + + bytes.append(&mut self.get_chunk(chunk)?.to_vec()); + bytes.append(&mut int_to_bytes32(length as u64)); + + Ok(hash(&bytes)) + } + + pub fn into_merkle_tree(self) -> Vec { + self.cache + } + + pub fn into_components(self) -> (Vec, Vec) { + (self.cache, self.chunk_modified) + } +} diff --git a/eth2/utils/ssz/src/cached_tree_hash/impls.rs b/eth2/utils/tree_hash/src/impls.rs similarity index 99% rename from eth2/utils/ssz/src/cached_tree_hash/impls.rs rename to eth2/utils/tree_hash/src/impls.rs index 26905c667..d5297c38e 100644 --- a/eth2/utils/ssz/src/cached_tree_hash/impls.rs +++ b/eth2/utils/tree_hash/src/impls.rs @@ -1,6 +1,6 @@ use super::resize::{grow_merkle_cache, shrink_merkle_cache}; use super::*; -use crate::ssz_encode; +use ssz::ssz_encode; impl CachedTreeHash for u64 { fn item_type() -> ItemType { diff --git a/eth2/utils/tree_hash/src/lib.rs b/eth2/utils/tree_hash/src/lib.rs new file mode 100644 index 000000000..1b085770d --- /dev/null +++ b/eth2/utils/tree_hash/src/lib.rs @@ -0,0 +1,249 @@ +use hashing::hash; +use int_to_bytes::int_to_bytes32; +use std::fmt::Debug; +use std::iter::Iterator; +use std::ops::Range; + +mod cached_tree_hash; +mod impls; +mod resize; + +pub use cached_tree_hash::TreeHashCache; + +pub const BYTES_PER_CHUNK: usize = 32; +pub const HASHSIZE: usize = 32; +pub const MERKLE_HASH_CHUNCK: usize = 2 * BYTES_PER_CHUNK; + +#[derive(Debug, PartialEq, Clone)] +pub enum Error { + ShouldNotProduceBTreeOverlay, + NoFirstNode, + NoBytesForRoot, + UnableToObtainSlices, + UnableToGrowMerkleTree, + UnableToShrinkMerkleTree, + BytesAreNotEvenChunks(usize), + NoModifiedFieldForChunk(usize), + NoBytesForChunk(usize), +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ItemType { + Basic, + List, + Composite, +} + +// TODO: remove debug requirement. +pub trait CachedTreeHash: Debug { + fn item_type() -> ItemType; + + fn build_tree_hash_cache(&self) -> Result; + + /// Return the number of bytes when this element is encoded as raw SSZ _without_ length + /// prefixes. + fn num_bytes(&self) -> usize; + + fn offsets(&self) -> Result, Error>; + + fn num_child_nodes(&self) -> usize; + + fn packed_encoding(&self) -> Vec; + + fn packing_factor() -> usize; + + fn cached_hash_tree_root( + &self, + other: &Item, + cache: &mut TreeHashCache, + chunk: usize, + ) -> Result; +} + +fn children(parent: usize) -> (usize, usize) { + ((2 * parent + 1), (2 * parent + 2)) +} + +fn num_nodes(num_leaves: usize) -> usize { + 2 * num_leaves - 1 +} + +fn node_range_to_byte_range(node_range: &Range) -> Range { + node_range.start * HASHSIZE..node_range.end * HASHSIZE +} + +#[derive(Debug)] +pub struct BTreeOverlay { + num_internal_nodes: usize, + pub num_leaf_nodes: usize, + first_node: usize, + next_node: usize, + offsets: Vec, +} + +impl BTreeOverlay { + pub fn new(item: &T, initial_offset: usize) -> Result + where + T: CachedTreeHash, + { + Self::from_lengths(initial_offset, item.offsets()?) + } + + fn from_lengths(offset: usize, mut lengths: Vec) -> Result { + // Extend it to the next power-of-two, if it is not already. + let num_leaf_nodes = if lengths.len().is_power_of_two() { + lengths.len() + } else { + let num_leaf_nodes = lengths.len().next_power_of_two(); + lengths.resize(num_leaf_nodes, 1); + num_leaf_nodes + }; + + let num_nodes = num_nodes(num_leaf_nodes); + let num_internal_nodes = num_nodes - num_leaf_nodes; + + let mut offsets = Vec::with_capacity(num_nodes); + offsets.append(&mut (offset..offset + num_internal_nodes).collect()); + + let mut next_node = num_internal_nodes + offset; + for i in 0..num_leaf_nodes { + offsets.push(next_node); + next_node += lengths[i]; + } + + Ok(Self { + num_internal_nodes, + num_leaf_nodes, + offsets, + first_node: offset, + next_node, + }) + } + + pub fn root(&self) -> usize { + self.first_node + } + + pub fn height(&self) -> usize { + self.num_leaf_nodes.trailing_zeros() as usize + } + + pub fn chunk_range(&self) -> Range { + self.first_node..self.next_node + } + + pub fn total_chunks(&self) -> usize { + self.next_node - self.first_node + } + + pub fn total_nodes(&self) -> usize { + self.num_internal_nodes + self.num_leaf_nodes + } + + pub fn first_leaf_node(&self) -> Result { + self.offsets + .get(self.num_internal_nodes) + .cloned() + .ok_or_else(|| Error::NoFirstNode) + } + + pub fn next_node(&self) -> usize { + self.next_node + } + + /// Returns an iterator visiting each internal node, providing the left and right child chunks + /// for the node. + pub fn iter_internal_nodes<'a>( + &'a self, + ) -> impl DoubleEndedIterator { + let internal_nodes = &self.offsets[0..self.num_internal_nodes]; + + internal_nodes.iter().enumerate().map(move |(i, parent)| { + let children = children(i); + ( + parent, + (&self.offsets[children.0], &self.offsets[children.1]), + ) + }) + } + + /// Returns an iterator visiting each leaf node, providing the chunk for that node. + pub fn iter_leaf_nodes<'a>(&'a self) -> impl DoubleEndedIterator { + let leaf_nodes = &self.offsets[self.num_internal_nodes..]; + + leaf_nodes.iter() + } +} + +/// Split `values` into a power-of-two, identical-length chunks (padding with `0`) and merkleize +/// them, returning the entire merkle tree. +/// +/// The root hash is `merkleize(values)[0..BYTES_PER_CHUNK]`. +pub fn merkleize(values: Vec) -> Vec { + let values = sanitise_bytes(values); + + let leaves = values.len() / HASHSIZE; + + if leaves == 0 { + panic!("No full leaves"); + } + + if !leaves.is_power_of_two() { + panic!("leaves is not power of two"); + } + + let mut o: Vec = vec![0; (num_nodes(leaves) - leaves) * HASHSIZE]; + o.append(&mut values.to_vec()); + + let mut i = o.len(); + let mut j = o.len() - values.len(); + + while i >= MERKLE_HASH_CHUNCK { + i -= MERKLE_HASH_CHUNCK; + let hash = hash(&o[i..i + MERKLE_HASH_CHUNCK]); + + j -= HASHSIZE; + o[j..j + HASHSIZE].copy_from_slice(&hash); + } + + o +} + +pub fn sanitise_bytes(mut bytes: Vec) -> Vec { + let present_leaves = num_unsanitized_leaves(bytes.len()); + let required_leaves = present_leaves.next_power_of_two(); + + if (present_leaves != required_leaves) | last_leaf_needs_padding(bytes.len()) { + bytes.resize(num_bytes(required_leaves), 0); + } + + bytes +} + +fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec) { + let required_leaves = num_leaves.next_power_of_two(); + + bytes.resize( + bytes.len() + (required_leaves - num_leaves) * BYTES_PER_CHUNK, + 0, + ); +} + +fn last_leaf_needs_padding(num_bytes: usize) -> bool { + num_bytes % HASHSIZE != 0 +} + +/// Rounds up +fn num_unsanitized_leaves(num_bytes: usize) -> usize { + (num_bytes + HASHSIZE - 1) / HASHSIZE +} + +/// Rounds up +fn num_sanitized_leaves(num_bytes: usize) -> usize { + let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE; + leaves.next_power_of_two() +} + +fn num_bytes(num_leaves: usize) -> usize { + num_leaves * HASHSIZE +} diff --git a/eth2/utils/ssz/src/cached_tree_hash/resize.rs b/eth2/utils/tree_hash/src/resize.rs similarity index 100% rename from eth2/utils/ssz/src/cached_tree_hash/resize.rs rename to eth2/utils/tree_hash/src/resize.rs diff --git a/eth2/utils/ssz/src/cached_tree_hash/tests.rs b/eth2/utils/tree_hash/tests/tests.rs similarity index 99% rename from eth2/utils/ssz/src/cached_tree_hash/tests.rs rename to eth2/utils/tree_hash/tests/tests.rs index e6e2b1754..972eb1e00 100644 --- a/eth2/utils/ssz/src/cached_tree_hash/tests.rs +++ b/eth2/utils/tree_hash/tests/tests.rs @@ -1,6 +1,10 @@ -#![cfg(test)] -use super::*; +use hashing::hash; use int_to_bytes::{int_to_bytes32, int_to_bytes8}; +use tree_hash::*; + +fn num_nodes(num_leaves: usize) -> usize { + 2 * num_leaves - 1 +} #[derive(Clone, Debug)] pub struct Inner {