Tidy and add docs for cached tree hash

This commit is contained in:
Paul Hauner 2019-04-29 17:46:01 +10:00
parent f622aa0b65
commit 84d72cfed6
No known key found for this signature in database
GPG Key ID: D362883A9218FCC6
15 changed files with 292 additions and 159 deletions

View File

@ -806,7 +806,7 @@ impl BeaconState {
/// canonical root of `self`.
pub fn update_tree_hash_cache(&mut self) -> Result<Hash256, Error> {
if self.tree_hash_cache.is_empty() {
self.tree_hash_cache = TreeHashCache::new(self, 0)?;
self.tree_hash_cache = TreeHashCache::new(self)?;
} else {
// Move the cache outside of `self` to satisfy the borrow checker.
let mut cache = std::mem::replace(&mut self.tree_hash_cache, TreeHashCache::default());
@ -828,7 +828,7 @@ impl BeaconState {
/// cache update.
pub fn cached_tree_hash_root(&self) -> Result<Hash256, Error> {
self.tree_hash_cache
.root()
.tree_hash_root()
.and_then(|b| Ok(Hash256::from_slice(b)))
.map_err(|e| e.into())
}

View File

@ -46,7 +46,7 @@ macro_rules! cached_tree_hash_tests {
// Test the original hash
let original = $type::random_for_test(&mut rng);
let mut cache = cached_tree_hash::TreeHashCache::new(&original, 0).unwrap();
let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap();
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
@ -64,7 +64,7 @@ macro_rules! cached_tree_hash_tests {
);
// Produce a new cache for the modified object and compare it to the updated cache.
let mut modified_cache = cached_tree_hash::TreeHashCache::new(&modified, 0).unwrap();
let mut modified_cache = cached_tree_hash::TreeHashCache::new(&modified).unwrap();
// Reset the caches.
cache.reset_modifications();

View File

@ -63,20 +63,20 @@ where
&self,
depth: usize,
) -> Result<cached_tree_hash::TreeHashCache, cached_tree_hash::Error> {
let (cache, _overlay) = cached_tree_hash::impls::vec::new_tree_hash_cache(self, depth)?;
let (cache, _overlay) = cached_tree_hash::vec::new_tree_hash_cache(self, depth)?;
Ok(cache)
}
fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema {
cached_tree_hash::impls::vec::produce_schema(self, depth)
cached_tree_hash::vec::produce_schema(self, depth)
}
fn update_tree_hash_cache(
&self,
cache: &mut cached_tree_hash::TreeHashCache,
) -> Result<(), cached_tree_hash::Error> {
cached_tree_hash::impls::vec::update_tree_hash_cache(self, cache)?;
cached_tree_hash::vec::update_tree_hash_cache(self, cache)?;
Ok(())
}
@ -122,15 +122,21 @@ mod test {
pub fn test_cached_tree_hash() {
let original = TreeHashVector::from(vec![1_u64, 2, 3, 4]);
let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap();
let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
original.tree_hash_root()
);
let modified = TreeHashVector::from(vec![1_u64, 1, 1, 1]);
hasher.update(&modified).unwrap();
cache.update(&modified).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
modified.tree_hash_root()
);
}
}

View File

@ -149,15 +149,21 @@ mod tests {
let sk = SecretKey::random();
let original = PublicKey::from_secret_key(&sk);
let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap();
let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
original.tree_hash_root()
);
let sk = SecretKey::random();
let modified = PublicKey::from_secret_key(&sk);
hasher.update(&modified).unwrap();
cache.update(&modified).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
modified.tree_hash_root()
);
}
}

View File

@ -166,15 +166,21 @@ mod tests {
let keypair = Keypair::random();
let original = Signature::new(&[42, 42], 0, &keypair.sk);
let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap();
let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
original.tree_hash_root()
);
let modified = Signature::new(&[99, 99], 0, &keypair.sk);
hasher.update(&modified).unwrap();
cache.update(&modified).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
modified.tree_hash_root()
);
}
#[test]

View File

@ -284,15 +284,21 @@ mod tests {
pub fn test_cached_tree_hash() {
let original = BooleanBitfield::from_bytes(&vec![18; 12][..]);
let mut hasher = cached_tree_hash::CachedTreeHasher::new(&original).unwrap();
let mut cache = cached_tree_hash::TreeHashCache::new(&original).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), original.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
original.tree_hash_root()
);
let modified = BooleanBitfield::from_bytes(&vec![2; 1][..]);
hasher.update(&modified).unwrap();
cache.update(&modified).unwrap();
assert_eq!(hasher.tree_hash_root().unwrap(), modified.tree_hash_root());
assert_eq!(
cache.tree_hash_root().unwrap().to_vec(),
modified.tree_hash_root()
);
}
#[test]

View File

@ -2,7 +2,7 @@ use cached_tree_hash::TreeHashCache;
use ethereum_types::H256 as Hash256;
fn run(vec: &Vec<Hash256>, modified_vec: &Vec<Hash256>) {
let mut cache = TreeHashCache::new(vec, 0).unwrap();
let mut cache = TreeHashCache::new(vec).unwrap();
cache.update(modified_vec).unwrap();
}

View File

@ -1,7 +1,19 @@
use super::*;
/// A schema defining a binary tree over a `TreeHashCache`.
///
/// This structure is used for succinct storage, run-time functionality is gained by converting the
/// schema into a `BTreeOverlay`.
#[derive(Debug, PartialEq, Clone)]
pub struct BTreeSchema {
/// The depth of a schema defines how far it is nested within other fixed-length items.
///
/// Each time a new variable-length object is created all items within it are assigned a depth
/// of `depth + 1`.
///
/// When storing the schemas in a list, the depth parameter allows for removing all schemas
/// belonging to a specific variable-length item without removing schemas related to adjacent
/// variable-length items.
pub depth: usize,
lengths: Vec<usize>,
}
@ -25,21 +37,35 @@ impl Into<BTreeSchema> for BTreeOverlay {
}
}
/// Provides a status for some leaf-node in binary tree.
#[derive(Debug, PartialEq, Clone)]
pub enum LeafNode {
/// The leaf node does not exist in this tree.
DoesNotExist,
/// The leaf node exists in the tree and has a real value within the given `chunk` range.
Exists(Range<usize>),
/// The leaf node exists in the tree only as padding.
Padding,
}
/// Instantiated from a `BTreeSchema`, allows for interpreting some chunks of a `TreeHashCache` as
/// a perfect binary tree.
///
/// The primary purpose of this struct is to map from binary tree "nodes" to `TreeHashCache`
/// "chunks". Each tree has nodes `0..n` where `n` is the number of nodes and `0` is the root node.
/// Each of these nodes is mapped to a chunk, starting from `self.offset` and increasing in steps
/// of `1` for internal nodes and arbitrary steps for leaf-nodes.
#[derive(Debug, PartialEq, Clone)]
pub struct BTreeOverlay {
offset: usize,
/// See `BTreeSchema.depth` for a description.
pub depth: usize,
lengths: Vec<usize>,
}
impl BTreeOverlay {
/// Instantiates a new instance for `item`, where it's first chunk is `inital_offset` and has
/// the specified `depth`.
pub fn new<T>(item: &T, initial_offset: usize, depth: usize) -> Self
where
T: CachedTreeHash,
@ -47,6 +73,7 @@ impl BTreeOverlay {
Self::from_schema(item.tree_hash_cache_schema(depth), initial_offset)
}
/// Instantiates a new instance from a schema, where it's first chunk is `offset`.
pub fn from_schema(schema: BTreeSchema, offset: usize) -> Self {
Self {
offset,
@ -55,6 +82,10 @@ impl BTreeOverlay {
}
}
/// Returns a `LeafNode` for each of the `n` leaves of the tree.
///
/// `LeafNode::DoesNotExist` is returned for each element `i` in `0..n` where `i >=
/// self.num_leaf_nodes()`.
pub fn get_leaf_nodes(&self, n: usize) -> Vec<LeafNode> {
let mut running_offset = self.offset + self.num_internal_nodes();
@ -74,10 +105,12 @@ impl BTreeOverlay {
leaf_nodes
}
/// Returns the number of leaf nodes in the tree.
pub fn num_leaf_nodes(&self) -> usize {
self.lengths.len().next_power_of_two()
}
/// Returns the number of leafs in the tree which are padding.
pub fn num_padding_leaves(&self) -> usize {
self.num_leaf_nodes() - self.lengths.len()
}
@ -90,31 +123,39 @@ impl BTreeOverlay {
2 * self.num_leaf_nodes() - 1
}
/// Returns the number of internal (non-leaf) nodes in the tree.
pub fn num_internal_nodes(&self) -> usize {
self.num_leaf_nodes() - 1
}
/// Returns the chunk of the first node of the tree.
fn first_node(&self) -> usize {
self.offset
}
/// Returns the root chunk of the tree (the zero-th node)
pub fn root(&self) -> usize {
self.first_node()
}
/// Returns the first chunk outside of the boundary of this tree. It is the root node chunk
/// plus the total number of chunks in the tree.
pub fn next_node(&self) -> usize {
self.first_node() + self.num_internal_nodes() + self.num_leaf_nodes() - self.lengths.len()
+ self.lengths.iter().sum::<usize>()
}
/// Returns the height of the tree where a tree with a single node has a height of 1.
pub fn height(&self) -> usize {
self.num_leaf_nodes().trailing_zeros() as usize
}
/// Returns the range of chunks that belong to the internal nodes of the tree.
pub fn internal_chunk_range(&self) -> Range<usize> {
self.offset..self.offset + self.num_internal_nodes()
}
/// Returns all of the chunks that are encompassed by the tree.
pub fn chunk_range(&self) -> Range<usize> {
self.first_node()..self.next_node()
}
@ -127,10 +168,14 @@ impl BTreeOverlay {
self.next_node() - self.first_node()
}
/// Returns the first chunk of the first leaf node in the tree.
pub fn first_leaf_node(&self) -> usize {
self.offset + self.num_internal_nodes()
}
/// Returns the chunks for some given parent node.
///
/// Note: it is a parent _node_ not a parent _chunk_.
pub fn child_chunks(&self, parent: usize) -> (usize, usize) {
let children = children(parent);
@ -142,7 +187,7 @@ impl BTreeOverlay {
}
}
/// (parent, (left_child, right_child))
/// Returns a vec of (parent_chunk, (left_child_chunk, right_child_chunk)).
pub fn internal_parents_and_children(&self) -> Vec<(usize, (usize, usize))> {
let mut chunks = Vec::with_capacity(self.num_nodes());
chunks.append(&mut self.internal_node_chunks());
@ -156,17 +201,17 @@ impl BTreeOverlay {
.collect()
}
// Returns a `Vec` of chunk indices for each internal node of the tree.
/// Returns a vec of chunk indices for each internal node of the tree.
pub fn internal_node_chunks(&self) -> Vec<usize> {
(self.offset..self.offset + self.num_internal_nodes()).collect()
}
// Returns a `Vec` of the first chunk index for each leaf node of the tree.
/// Returns a vec of the first chunk for each leaf node of the tree.
pub fn leaf_node_chunks(&self) -> Vec<usize> {
self.n_leaf_node_chunks(self.num_leaf_nodes())
}
// Returns a `Vec` of the first chunk index for the first `n` leaf nodes of the tree.
/// Returns a vec of the first chunk index for the first `n` leaf nodes of the tree.
fn n_leaf_node_chunks(&self, n: usize) -> Vec<usize> {
let mut chunks = Vec::with_capacity(n);

View File

@ -66,10 +66,10 @@ pub fn new_tree_hash_cache<T: CachedTreeHash>(
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let subtrees = vec
.iter()
.map(|item| TreeHashCache::new(item, depth + 1))
.map(|item| TreeHashCache::new_at_depth(item, depth + 1))
.collect::<Result<Vec<TreeHashCache>, _>>()?;
TreeHashCache::from_leaves_and_subtrees(&vec, subtrees, depth)
TreeHashCache::from_subtrees(&vec, subtrees, depth)
}
}?;
@ -301,7 +301,7 @@ fn splice_in_new_tree<T>(
where
T: CachedTreeHash,
{
let (bytes, mut bools, schemas) = TreeHashCache::new(item, depth)?.into_components();
let (bytes, mut bools, schemas) = TreeHashCache::new_at_depth(item, depth)?.into_components();
// Record the number of schemas, this will be used later in the fn.
let num_schemas = schemas.len();

View File

@ -1,16 +1,52 @@
//! Performs cached merkle-hashing adhering to the Ethereum 2.0 specification defined
//! [here](https://github.com/ethereum/eth2.0-specs/blob/v0.5.1/specs/simple-serialize.md#merkleization).
//!
//! Caching allows for reduced hashing when some object has only been partially modified. This
//! allows for significant CPU-time savings (at the cost of additional storage). For example,
//! determining the root of a list of 1024 items with a single modification has been observed to
//! run in 1/25th of the time of a full merkle hash.
//!
//!
//! # Example:
//!
//! ```
//! use cached_tree_hash::TreeHashCache;
//! use tree_hash_derive::{TreeHash, CachedTreeHash};
//!
//! #[derive(TreeHash, CachedTreeHash)]
//! struct Foo {
//! bar: u64,
//! baz: Vec<u64>
//! }
//!
//! let mut foo = Foo {
//! bar: 1,
//! baz: vec![0, 1, 2]
//! };
//!
//! let mut cache = TreeHashCache::new(&foo).unwrap();
//!
//! foo.baz[1] = 0;
//!
//! cache.update(&foo).unwrap();
//!
//! println!("Root is: {:?}", cache.tree_hash_root().unwrap());
//! ```
use hashing::hash;
use std::ops::Range;
use tree_hash::{TreeHash, TreeHashType, BYTES_PER_CHUNK, HASHSIZE};
mod btree_overlay;
mod errors;
pub mod impls;
mod impls;
pub mod merkleize;
mod resize;
mod tree_hash_cache;
pub use btree_overlay::{BTreeOverlay, BTreeSchema};
pub use errors::Error;
pub use impls::vec;
pub use tree_hash_cache::TreeHashCache;
pub trait CachedTreeHash: TreeHash {
@ -25,34 +61,8 @@ pub trait CachedTreeHash: TreeHash {
fn update_tree_hash_cache(&self, cache: &mut TreeHashCache) -> Result<(), Error>;
}
#[derive(Debug, PartialEq)]
pub struct CachedTreeHasher {
pub cache: TreeHashCache,
}
impl CachedTreeHasher {
pub fn new<T>(item: &T) -> Result<Self, Error>
where
T: CachedTreeHash,
{
Ok(Self {
cache: TreeHashCache::new(item, 0)?,
})
}
pub fn update<T>(&mut self, item: &T) -> Result<(), Error>
where
T: CachedTreeHash,
{
self.cache.update(item)
}
pub fn tree_hash_root(&self) -> Result<Vec<u8>, Error> {
// Return the root of the cache -- the merkle root.
Ok(self.cache.root()?.to_vec())
}
}
/// Implements `CachedTreeHash` on `$type` as a fixed-length tree-hash vector of the ssz encoding
/// of `$type`.
#[macro_export]
macro_rules! cached_tree_hash_ssz_encoding_as_vector {
($type: ident, $num_bytes: expr) => {
@ -61,10 +71,8 @@ macro_rules! cached_tree_hash_ssz_encoding_as_vector {
&self,
depth: usize,
) -> Result<cached_tree_hash::TreeHashCache, cached_tree_hash::Error> {
let (cache, _schema) = cached_tree_hash::impls::vec::new_tree_hash_cache(
&ssz::ssz_encode(self),
depth,
)?;
let (cache, _schema) =
cached_tree_hash::vec::new_tree_hash_cache(&ssz::ssz_encode(self), depth)?;
Ok(cache)
}
@ -79,10 +87,7 @@ macro_rules! cached_tree_hash_ssz_encoding_as_vector {
&self,
cache: &mut cached_tree_hash::TreeHashCache,
) -> Result<(), cached_tree_hash::Error> {
cached_tree_hash::impls::vec::update_tree_hash_cache(
&ssz::ssz_encode(self),
cache,
)?;
cached_tree_hash::vec::update_tree_hash_cache(&ssz::ssz_encode(self), cache)?;
Ok(())
}
@ -90,6 +95,8 @@ macro_rules! cached_tree_hash_ssz_encoding_as_vector {
};
}
/// Implements `CachedTreeHash` on `$type` as a variable-length tree-hash list of the result of
/// calling `.as_bytes()` on `$type`.
#[macro_export]
macro_rules! cached_tree_hash_bytes_as_list {
($type: ident) => {
@ -101,7 +108,7 @@ macro_rules! cached_tree_hash_bytes_as_list {
let bytes = self.to_bytes();
let (mut cache, schema) =
cached_tree_hash::impls::vec::new_tree_hash_cache(&bytes, depth)?;
cached_tree_hash::vec::new_tree_hash_cache(&bytes, depth)?;
cache.add_length_nodes(schema.into_overlay(0).chunk_range(), bytes.len())?;
@ -115,7 +122,7 @@ macro_rules! cached_tree_hash_bytes_as_list {
fn tree_hash_cache_schema(&self, depth: usize) -> cached_tree_hash::BTreeSchema {
let bytes = self.to_bytes();
cached_tree_hash::impls::vec::produce_schema(&bytes, depth)
cached_tree_hash::vec::produce_schema(&bytes, depth)
}
fn update_tree_hash_cache(
@ -128,8 +135,7 @@ macro_rules! cached_tree_hash_bytes_as_list {
cache.chunk_index += 1;
// Update the cache, returning the new overlay.
let new_overlay =
cached_tree_hash::impls::vec::update_tree_hash_cache(&bytes, cache)?;
let new_overlay = cached_tree_hash::vec::update_tree_hash_cache(&bytes, cache)?;
// Mix in length
cache.mix_in_length(new_overlay.chunk_range(), bytes.len())?;

View File

@ -35,6 +35,7 @@ pub fn merkleize(values: Vec<u8>) -> Vec<u8> {
o
}
/// Ensures that the given `bytes` are a power-of-two chunks, padding with zero if not.
pub fn sanitise_bytes(mut bytes: Vec<u8>) -> Vec<u8> {
let present_leaves = num_unsanitized_leaves(bytes.len());
let required_leaves = present_leaves.next_power_of_two();
@ -46,6 +47,7 @@ pub fn sanitise_bytes(mut bytes: Vec<u8>) -> Vec<u8> {
bytes
}
/// Pads out `bytes` to ensure it is a clean `num_leaves` chunks.
pub fn pad_for_leaf_count(num_leaves: usize, bytes: &mut Vec<u8>) {
let required_leaves = num_leaves.next_power_of_two();
@ -59,9 +61,10 @@ fn last_leaf_needs_padding(num_bytes: usize) -> bool {
num_bytes % HASHSIZE != 0
}
/// Rounds up
pub fn num_unsanitized_leaves(num_bytes: usize) -> usize {
(num_bytes + HASHSIZE - 1) / HASHSIZE
/// Returns the number of leaves for a given `bytes_len` number of bytes, rounding up if
/// `num_bytes` is not a client multiple of chunk size.
pub fn num_unsanitized_leaves(bytes_len: usize) -> usize {
(bytes_len + HASHSIZE - 1) / HASHSIZE
}
fn num_bytes(num_leaves: usize) -> usize {
@ -72,7 +75,9 @@ fn num_nodes(num_leaves: usize) -> usize {
2 * num_leaves - 1
}
pub fn num_sanitized_leaves(num_bytes: usize) -> usize {
let leaves = (num_bytes + HASHSIZE - 1) / HASHSIZE;
/// Returns the power-of-two number of leaves that would result from the given `bytes_len` number
/// of bytes.
pub fn num_sanitized_leaves(bytes_len: usize) -> usize {
let leaves = (bytes_len + HASHSIZE - 1) / HASHSIZE;
leaves.next_power_of_two()
}

View File

@ -4,20 +4,35 @@ use super::*;
use crate::merkleize::{merkleize, pad_for_leaf_count};
use int_to_bytes::int_to_bytes32;
/// Provides cached tree hashing for some object implementing `CachedTreeHash`.
///
/// Caching allows for doing minimal internal-node hashing when an object has only been partially
/// changed.
///
/// See the crate root for an example.
#[derive(Debug, PartialEq, Clone)]
pub struct TreeHashCache {
pub cache: Vec<u8>,
/// Stores the binary-tree in 32-byte chunks.
pub bytes: Vec<u8>,
/// Maps to each chunk of `self.bytes`, indicating if the chunk is dirty.
pub chunk_modified: Vec<bool>,
/// Contains a schema for each variable-length item stored in the cache.
pub schemas: Vec<BTreeSchema>,
/// A counter used during updates.
pub chunk_index: usize,
/// A counter used during updates.
pub schema_index: usize,
}
impl Default for TreeHashCache {
/// Create an empty cache.
///
/// Note: an empty cache is effectively useless, an error will be raised if `self.update` is
/// called.
fn default() -> TreeHashCache {
TreeHashCache {
cache: vec![],
bytes: vec![],
chunk_modified: vec![],
schemas: vec![],
chunk_index: 0,
@ -26,20 +41,34 @@ impl Default for TreeHashCache {
}
}
impl Into<Vec<u8>> for TreeHashCache {
fn into(self) -> Vec<u8> {
self.cache
}
}
impl TreeHashCache {
pub fn new<T>(item: &T, depth: usize) -> Result<Self, Error>
/// Instantiates a new cache from `item` at a depth of `0`.
///
/// The returned cache is fully-built and will return an accurate tree-hash root.
pub fn new<T>(item: &T) -> Result<Self, Error>
where
T: CachedTreeHash,
{
Self::new_at_depth(item, 0)
}
/// Instantiates a new cache from `item` at the specified `depth`.
///
/// The returned cache is fully-built and will return an accurate tree-hash root.
pub fn new_at_depth<T>(item: &T, depth: usize) -> Result<Self, Error>
where
T: CachedTreeHash,
{
item.new_tree_hash_cache(depth)
}
/// Updates the cache with `item`.
///
/// `item` _must_ be of the same type as the `item` used to build the cache, otherwise an error
/// may be returned.
///
/// After calling `update`, the cache will return an accurate tree-hash root using
/// `self.tree_hash_root()`.
pub fn update<T>(&mut self, item: &T) -> Result<(), Error>
where
T: CachedTreeHash,
@ -53,11 +82,10 @@ impl TreeHashCache {
}
}
pub fn from_leaves_and_subtrees<T>(
item: &T,
leaves_and_subtrees: Vec<Self>,
depth: usize,
) -> Result<Self, Error>
/// Builds a new cache for `item`, given `subtrees` contains a `Self` for field/item of `item`.
///
/// Each `subtree` in `subtree` will become a leaf-node of the merkle-tree of `item`.
pub fn from_subtrees<T>(item: &T, subtrees: Vec<Self>, depth: usize) -> Result<Self, Error>
where
T: CachedTreeHash,
{
@ -65,20 +93,18 @@ impl TreeHashCache {
// Note how many leaves were provided. If is not a power-of-two, we'll need to pad it out
// later.
let num_provided_leaf_nodes = leaves_and_subtrees.len();
let num_provided_leaf_nodes = subtrees.len();
// Allocate enough bytes to store the internal nodes and the leaves and subtrees, then fill
// all the to-be-built internal nodes with zeros and append the leaves and subtrees.
let internal_node_bytes = overlay.num_internal_nodes() * BYTES_PER_CHUNK;
let leaves_and_subtrees_bytes = leaves_and_subtrees
.iter()
.fold(0, |acc, t| acc + t.bytes_len());
let mut cache = Vec::with_capacity(leaves_and_subtrees_bytes + internal_node_bytes);
cache.resize(internal_node_bytes, 0);
let subtrees_bytes = subtrees.iter().fold(0, |acc, t| acc + t.bytes.len());
let mut bytes = Vec::with_capacity(subtrees_bytes + internal_node_bytes);
bytes.resize(internal_node_bytes, 0);
// Allocate enough bytes to store all the leaves.
let mut leaves = Vec::with_capacity(overlay.num_leaf_nodes() * HASHSIZE);
let mut schemas = Vec::with_capacity(leaves_and_subtrees.len());
let mut schemas = Vec::with_capacity(subtrees.len());
if T::tree_hash_type() == TreeHashType::List {
schemas.push(overlay.into());
@ -86,32 +112,36 @@ impl TreeHashCache {
// Iterate through all of the leaves/subtrees, adding their root as a leaf node and then
// concatenating their merkle trees.
for t in leaves_and_subtrees {
leaves.append(&mut t.root()?.to_vec());
for t in subtrees {
leaves.append(&mut t.tree_hash_root()?.to_vec());
let (mut bytes, _bools, mut t_schemas) = t.into_components();
cache.append(&mut bytes);
let (mut t_bytes, _bools, mut t_schemas) = t.into_components();
bytes.append(&mut t_bytes);
schemas.append(&mut t_schemas);
}
// Pad the leaves to an even power-of-two, using zeros.
pad_for_leaf_count(num_provided_leaf_nodes, &mut cache);
pad_for_leaf_count(num_provided_leaf_nodes, &mut bytes);
// Merkleize the leaves, then split the leaf nodes off them. Then, replace all-zeros
// internal nodes created earlier with the internal nodes generated by `merkleize`.
let mut merkleized = merkleize(leaves);
merkleized.split_off(internal_node_bytes);
cache.splice(0..internal_node_bytes, merkleized);
bytes.splice(0..internal_node_bytes, merkleized);
Ok(Self {
chunk_modified: vec![true; cache.len() / BYTES_PER_CHUNK],
cache,
chunk_modified: vec![true; bytes.len() / BYTES_PER_CHUNK],
bytes,
schemas,
chunk_index: 0,
schema_index: 0,
})
}
/// Instantiate a new cache from the pre-built `bytes` where each `self.chunk_modified` will be
/// set to `intitial_modified_state`.
///
/// Note: `bytes.len()` must be a multiple of 32
pub fn from_bytes(
bytes: Vec<u8>,
initial_modified_state: bool,
@ -128,17 +158,22 @@ impl TreeHashCache {
Ok(Self {
chunk_modified: vec![initial_modified_state; bytes.len() / BYTES_PER_CHUNK],
cache: bytes,
bytes,
schemas,
chunk_index: 0,
schema_index: 0,
})
}
/// Returns `true` if this cache is empty (i.e., it has never been built for some item).
///
/// Note: an empty cache is effectively useless, an error will be raised if `self.update` is
/// called.
pub fn is_empty(&self) -> bool {
self.chunk_modified.is_empty()
}
/// Return an overlay, built from the schema at `schema_index` with an offset of `chunk_index`.
pub fn get_overlay(
&self,
schema_index: usize,
@ -152,6 +187,9 @@ impl TreeHashCache {
.into_overlay(chunk_index))
}
/// Resets the per-update counters, allowing a new update to start.
///
/// Note: this does _not_ delete the contents of the cache.
pub fn reset_modifications(&mut self) {
// Reset the per-hash counters.
self.chunk_index = 0;
@ -162,9 +200,14 @@ impl TreeHashCache {
}
}
/// Replace the schema at `schema_index` with the schema derived from `new_overlay`.
///
/// If the `new_overlay` schema has a different number of internal nodes to the schema at
/// `schema_index`, the cache will be updated to add/remove these new internal nodes.
pub fn replace_overlay(
&mut self,
schema_index: usize,
// TODO: remove chunk index (if possible)
chunk_index: usize,
new_overlay: BTreeOverlay,
) -> Result<BTreeOverlay, Error> {
@ -225,6 +268,9 @@ impl TreeHashCache {
Ok(old_schema.into_overlay(chunk_index))
}
/// Remove all of the child schemas following `schema_index`.
///
/// Schema `a` is a child of schema `b` if `a.depth < b.depth`.
pub fn remove_proceeding_child_schemas(&mut self, schema_index: usize, depth: usize) {
let end = self
.schemas
@ -237,6 +283,8 @@ impl TreeHashCache {
self.schemas.splice(schema_index..end, vec![]);
}
/// Iterate through the internal nodes chunks of `overlay`, updating the chunk with the
/// merkle-root of it's children if either of those children are dirty.
pub fn update_internal_nodes(&mut self, overlay: &BTreeOverlay) -> Result<(), Error> {
for (parent, children) in overlay.internal_parents_and_children().into_iter().rev() {
if self.either_modified(children)? {
@ -247,37 +295,34 @@ impl TreeHashCache {
Ok(())
}
fn bytes_len(&self) -> usize {
self.cache.len()
}
/// Returns to the tree-hash root of the cache.
pub fn tree_hash_root(&self) -> Result<&[u8], Error> {
self.root()
}
pub fn root(&self) -> Result<&[u8], Error> {
if self.is_empty() {
Err(Error::CacheNotInitialized)
} else {
self.cache
self.bytes
.get(0..HASHSIZE)
.ok_or_else(|| Error::NoBytesForRoot)
}
}
/// Splices the given `bytes` over `self.bytes` and `bools` over `self.chunk_modified` at the
/// specified `chunk_range`.
pub fn splice(&mut self, chunk_range: Range<usize>, bytes: Vec<u8>, bools: Vec<bool>) {
// Update the `chunk_modified` vec, marking all spliced-in nodes as changed.
self.chunk_modified.splice(chunk_range.clone(), bools);
self.cache
self.bytes
.splice(node_range_to_byte_range(&chunk_range), bytes);
}
/// If the bytes at `chunk` are not the same as `to`, `self.bytes` is updated and
/// `self.chunk_modified` is set to `true`.
pub fn maybe_update_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> {
let start = chunk * BYTES_PER_CHUNK;
let end = start + BYTES_PER_CHUNK;
if !self.chunk_equals(chunk, to)? {
self.cache
self.bytes
.get_mut(start..end)
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?
.copy_from_slice(to);
@ -287,18 +332,20 @@ impl TreeHashCache {
Ok(())
}
/// Returns the slices of `self.bytes` and `self.chunk_modified` at the given `chunk_range`.
fn slices(&self, chunk_range: Range<usize>) -> Option<(&[u8], &[bool])> {
Some((
self.cache.get(node_range_to_byte_range(&chunk_range))?,
self.bytes.get(node_range_to_byte_range(&chunk_range))?,
self.chunk_modified.get(chunk_range)?,
))
}
/// Updates `self.bytes` at `chunk` and sets `self.chunk_modified` for the `chunk` to `true`.
pub fn modify_chunk(&mut self, chunk: usize, to: &[u8]) -> Result<(), Error> {
let start = chunk * BYTES_PER_CHUNK;
let end = start + BYTES_PER_CHUNK;
self.cache
self.bytes
.get_mut(start..end)
.ok_or_else(|| Error::NoBytesForChunk(chunk))?
.copy_from_slice(to);
@ -308,20 +355,23 @@ impl TreeHashCache {
Ok(())
}
/// Returns the bytes at `chunk`.
fn get_chunk(&self, chunk: usize) -> Result<&[u8], Error> {
let start = chunk * BYTES_PER_CHUNK;
let end = start + BYTES_PER_CHUNK;
Ok(self
.cache
.bytes
.get(start..end)
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))?)
}
/// Returns `true` if the bytes at `chunk` are equal to `other`.
fn chunk_equals(&mut self, chunk: usize, other: &[u8]) -> Result<bool, Error> {
Ok(self.get_chunk(chunk)? == other)
}
/// Returns `true` if `chunk` is dirty.
pub fn changed(&self, chunk: usize) -> Result<bool, Error> {
self.chunk_modified
.get(chunk)
@ -329,10 +379,12 @@ impl TreeHashCache {
.ok_or_else(|| Error::NoModifiedFieldForChunk(chunk))
}
/// Returns `true` if either of the `children` chunks is dirty.
fn either_modified(&self, children: (usize, usize)) -> Result<bool, Error> {
Ok(self.changed(children.0)? | self.changed(children.1)?)
}
/// Returns the hash of the concatenation of the given `children`.
pub fn hash_children(&self, children: (usize, usize)) -> Result<Vec<u8>, Error> {
let mut child_bytes = Vec::with_capacity(BYTES_PER_CHUNK * 2);
child_bytes.append(&mut self.get_chunk(children.0)?.to_vec());
@ -341,6 +393,7 @@ impl TreeHashCache {
Ok(hash(&child_bytes))
}
/// Adds a chunk before and after the given `chunk` range and calls `self.mix_in_length()`.
pub fn add_length_nodes(
&mut self,
chunk_range: Range<usize>,
@ -351,13 +404,13 @@ impl TreeHashCache {
let byte_range = node_range_to_byte_range(&chunk_range);
// Add the last node.
self.cache
self.bytes
.splice(byte_range.end..byte_range.end, vec![0; HASHSIZE]);
self.chunk_modified
.splice(chunk_range.end..chunk_range.end, vec![false]);
// Add the first node.
self.cache
self.bytes
.splice(byte_range.start..byte_range.start, vec![0; HASHSIZE]);
self.chunk_modified
.splice(chunk_range.start..chunk_range.start, vec![false]);
@ -367,6 +420,8 @@ impl TreeHashCache {
Ok(())
}
/// Sets `chunk_range.end + 1` equal to the little-endian serialization of `length`. Sets
/// `chunk_range.start - 1` equal to `self.hash_children(chunk_range.start, chunk_range.end + 1)`.
pub fn mix_in_length(&mut self, chunk_range: Range<usize>, length: usize) -> Result<(), Error> {
// Update the length chunk.
self.maybe_update_chunk(chunk_range.end, &int_to_bytes32(length as u64))?;
@ -380,8 +435,9 @@ impl TreeHashCache {
Ok(())
}
/// Returns `(self.bytes, self.chunk_modified, self.schemas)`.
pub fn into_components(self) -> (Vec<u8>, Vec<bool>, Vec<BTreeSchema>) {
(self.cache, self.chunk_modified, self.schemas)
(self.bytes, self.chunk_modified, self.schemas)
}
}

View File

@ -9,7 +9,7 @@ fn modifications() {
let vec: Vec<Hash256> = (0..n).map(|_| Hash256::random()).collect();
let mut cache = TreeHashCache::new(&vec, 0).unwrap();
let mut cache = TreeHashCache::new(&vec).unwrap();
cache.update(&vec).unwrap();
let modifications = cache.chunk_modified.iter().filter(|b| **b).count();
@ -36,60 +36,57 @@ fn test_routine<T>(original: T, modified: Vec<T>)
where
T: CachedTreeHash + std::fmt::Debug,
{
let mut hasher = CachedTreeHasher::new(&original).unwrap();
let mut cache = TreeHashCache::new(&original).unwrap();
let standard_root = original.tree_hash_root();
let cached_root = hasher.tree_hash_root().unwrap();
let cached_root = cache.tree_hash_root().unwrap();
assert_eq!(standard_root, cached_root, "Initial cache build failed.");
for (i, modified) in modified.iter().enumerate() {
println!("-- Start of modification {} --", i);
// Update the existing hasher.
hasher
cache
.update(modified)
.expect(&format!("Modification {}", i));
// Create a new hasher from the "modified" struct.
let modified_hasher = CachedTreeHasher::new(modified).unwrap();
let modified_cache = TreeHashCache::new(modified).unwrap();
assert_eq!(
hasher.cache.chunk_modified.len(),
modified_hasher.cache.chunk_modified.len(),
cache.chunk_modified.len(),
modified_cache.chunk_modified.len(),
"Number of chunks is different"
);
assert_eq!(
hasher.cache.cache.len(),
modified_hasher.cache.cache.len(),
cache.bytes.len(),
modified_cache.bytes.len(),
"Number of bytes is different"
);
assert_eq!(
hasher.cache.cache, modified_hasher.cache.cache,
"Bytes are different"
);
assert_eq!(cache.bytes, modified_cache.bytes, "Bytes are different");
assert_eq!(
hasher.cache.schemas.len(),
modified_hasher.cache.schemas.len(),
cache.schemas.len(),
modified_cache.schemas.len(),
"Number of schemas is different"
);
assert_eq!(
hasher.cache.schemas, modified_hasher.cache.schemas,
cache.schemas, modified_cache.schemas,
"Schemas are different"
);
// Test the root generated by the updated hasher matches a non-cached tree hash root.
let standard_root = modified.tree_hash_root();
let cached_root = hasher
let cached_root = cache
.tree_hash_root()
.expect(&format!("Modification {}", i));
assert_eq!(
standard_root, cached_root,
"Modification {} failed. \n Cache: {:?}",
i, hasher
i, cache
);
}
}
@ -194,20 +191,20 @@ fn test_shrinking_vec_of_vec() {
let original: Vec<Vec<u64>> = vec![vec![1], vec![2], vec![3], vec![4], vec![5]];
let modified: Vec<Vec<u64>> = original[0..3].to_vec();
let new_hasher = CachedTreeHasher::new(&modified).unwrap();
let new_cache = TreeHashCache::new(&modified).unwrap();
let mut modified_hasher = CachedTreeHasher::new(&original).unwrap();
modified_hasher.update(&modified).unwrap();
let mut modified_cache = TreeHashCache::new(&original).unwrap();
modified_cache.update(&modified).unwrap();
assert_eq!(
new_hasher.cache.schemas.len(),
modified_hasher.cache.schemas.len(),
new_cache.schemas.len(),
modified_cache.schemas.len(),
"Schema count is different"
);
assert_eq!(
new_hasher.cache.chunk_modified.len(),
modified_hasher.cache.chunk_modified.len(),
new_cache.chunk_modified.len(),
modified_cache.chunk_modified.len(),
"Chunk count is different"
);
}
@ -601,7 +598,7 @@ fn generic_test(index: usize) {
d: 4,
};
let mut cache = TreeHashCache::new(&inner, 0).unwrap();
let mut cache = TreeHashCache::new(&inner).unwrap();
let changed_inner = match index {
0 => Inner {
@ -636,7 +633,7 @@ fn generic_test(index: usize) {
let expected = merkleize(join(data));
let cache_bytes: Vec<u8> = cache.into();
let (cache_bytes, _, _) = cache.into_components();
assert_eq!(expected, cache_bytes);
}
@ -666,9 +663,9 @@ fn inner_builds() {
d: 4,
};
let cache: Vec<u8> = TreeHashCache::new(&inner, 0).unwrap().into();
let (cache_bytes, _, _) = TreeHashCache::new(&inner).unwrap().into_components();
assert_eq!(expected, cache);
assert_eq!(expected, cache_bytes);
}
fn join(many: Vec<Vec<u8>>) -> Vec<u8> {

View File

@ -58,7 +58,7 @@ pub fn subtree_derive(input: TokenStream) -> TokenStream {
let output = quote! {
impl cached_tree_hash::CachedTreeHash for #name {
fn new_tree_hash_cache(&self, depth: usize) -> Result<cached_tree_hash::TreeHashCache, cached_tree_hash::Error> {
let tree = cached_tree_hash::TreeHashCache::from_leaves_and_subtrees(
let tree = cached_tree_hash::TreeHashCache::from_subtrees(
self,
vec![
#(

View File

@ -1,4 +1,4 @@
use cached_tree_hash::{CachedTreeHash, CachedTreeHasher};
use cached_tree_hash::{CachedTreeHash, TreeHashCache};
use tree_hash::{merkleize::merkle_root, SignedRoot, TreeHash};
use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash};
@ -12,16 +12,16 @@ pub struct Inner {
fn test_standard_and_cached<T: CachedTreeHash>(original: &T, modified: &T) {
// let mut cache = original.new_tree_hash_cache().unwrap();
let mut hasher = CachedTreeHasher::new(original).unwrap();
let mut cache = TreeHashCache::new(original).unwrap();
let standard_root = original.tree_hash_root();
let cached_root = hasher.tree_hash_root().unwrap();
let cached_root = cache.tree_hash_root().unwrap();
assert_eq!(standard_root, cached_root);
// Test after a modification
hasher.update(modified).unwrap();
cache.update(modified).unwrap();
let standard_root = modified.tree_hash_root();
let cached_root = hasher.tree_hash_root().unwrap();
let cached_root = cache.tree_hash_root().unwrap();
assert_eq!(standard_root, cached_root);
}