Padding efficent merkle root algo (#436)

* Add initial work on padding efficent merkle roots * Improve merklize_padded * Improve tree_hash crate -- fix bugs, docs * Update codebase for tree_hash API change * Remove dbg statements, fix import error * Fix clippy lints, doc error * Tidy tree hash comments * Increase tree_hash max tree height * Fix PR review comments * Fix typos * Fix cache access off-by-one in tree hash * Set max tree depth to 48 (from 64)
2019-07-16 14:40:56 +10:00 · 2019-07-16 14:40:56 +10:00 · 88c6d15c32
commit 88c6d15c32
parent af499c0b8c
7 changed files with 403 additions and 12 deletions
--- a/eth2/utils/tree_hash/Cargo.toml
+++ b/eth2/utils/tree_hash/Cargo.toml
@ -5,9 +5,11 @@ authors = ["Paul Hauner <paul@paulhauner.com>"]
 edition = "2018"

 [dev-dependencies]
+rand = "0.7"
 tree_hash_derive = { path = "../tree_hash_derive" }

 [dependencies]
 ethereum-types = "0.5"
 hashing = { path = "../hashing" }
 int_to_bytes = { path = "../int_to_bytes" }
+lazy_static = "0.1"
--- a/eth2/utils/tree_hash/src/impls.rs
+++ b/eth2/utils/tree_hash/src/impls.rs
@ -1,5 +1,5 @@
 use super::*;
-use crate::merkleize::merkle_root;
+use crate::merkle_root;
 use ethereum_types::H256;
 use hashing::hash;
 use int_to_bytes::int_to_bytes32;
--- a/eth2/utils/tree_hash/src/lib.rs
+++ b/eth2/utils/tree_hash/src/lib.rs
@ -1,5 +1,17 @@
+#[macro_use]
+extern crate lazy_static;
+
 pub mod impls;
-pub mod merkleize;
+mod merkleize_padded;
+mod merkleize_standard;
+
+pub use merkleize_padded::merkleize_padded;
+pub use merkleize_standard::merkleize_standard;
+
+/// Alias to `merkleize_padded(&bytes, 0)`
+pub fn merkle_root(bytes: &[u8]) -> Vec<u8> {
+    merkleize_padded(&bytes, 0)
+}

 pub const BYTES_PER_CHUNK: usize = 32;
 pub const HASHSIZE: usize = 32;
@ -44,7 +56,7 @@ macro_rules! tree_hash_ssz_encoding_as_vector {
            }

            fn tree_hash_root(&self) -> Vec<u8> {
-                tree_hash::merkleize::merkle_root(&ssz::ssz_encode(self))
+                tree_hash::merkle_root(&ssz::ssz_encode(self))
            }
        }
    };
--- a/eth2/utils/tree_hash/src/merkleize_padded.rs
+++ b/eth2/utils/tree_hash/src/merkleize_padded.rs
@ -0,0 +1,366 @@
+use super::BYTES_PER_CHUNK;
+use hashing::hash;
+
+/// The size of the cache that stores padding nodes for a given height.
+///
+/// Currently, we panic if we encounter a tree with a height larger than `MAX_TREE_DEPTH`.
+///
+/// It is set to 48 as we expect it to be sufficiently high that we won't exceed it.
+pub const MAX_TREE_DEPTH: usize = 48;
+
+lazy_static! {
+    /// Cached zero hashes where `ZERO_HASHES[i]` is the hash of a Merkle tree with 2^i zero leaves.
+    static ref ZERO_HASHES: Vec<Vec<u8>> = {
+        let mut hashes = vec![vec![0; 32]; MAX_TREE_DEPTH + 1];
+
+        for i in 0..MAX_TREE_DEPTH {
+            hashes[i + 1] = hash_concat(&hashes[i], &hashes[i]);
+        }
+
+        hashes
+    };
+}
+
+/// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of
+/// leaves.
+///
+/// First all nodes are extracted from `bytes` and then a padding node is added until the number of
+/// leaf chunks is greater than or equal to `min_leaves`. Callers may set `min_leaves` to `0` if no
+/// adding additional chunks should be added to the given `bytes`.
+///
+/// If `bytes.len() <= BYTES_PER_CHUNK`, no hashing is done and `bytes` is returned, potentially
+/// padded out to `BYTES_PER_CHUNK` length with `0`.
+///
+/// ## CPU Performance
+///
+/// A cache of `MAX_TREE_DEPTH` hashes are stored to avoid re-computing the hashes of padding nodes
+/// (or their parents). Therefore, adding padding nodes only incurs one more hash per additional
+/// height of the tree.
+///
+/// ## Memory Performance
+///
+/// This algorithm has two interesting memory usage properties:
+///
+/// 1. The maximum memory footprint is roughly `O(V / 2)` memory, where `V` is the number of leaf
+///    chunks with values (i.e., leaves that are not padding). The means adding padding nodes to
+///    the tree does not increase the memory footprint.
+/// 2. At each height of the tree half of the memory is freed until only a single chunk is stored.
+/// 3. The input `bytes` are not copied into another list before processing.
+///
+/// _Note: there are some minor memory overheads, including a handful of usizes and a list of
+/// `MAX_TREE_DEPTH` hashes as `lazy_static` constants._
+pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Vec<u8> {
+    // If the bytes are just one chunk or less, pad to one chunk and return without hashing.
+    if bytes.len() <= BYTES_PER_CHUNK && min_leaves <= 1 {
+        let mut o = bytes.to_vec();
+        o.resize(BYTES_PER_CHUNK, 0);
+        return o;
+    }
+
+    assert!(
+        bytes.len() > BYTES_PER_CHUNK || min_leaves > 1,
+        "Merkle hashing only needs to happen if there is more than one chunk"
+    );
+
+    // The number of leaves that can be made directly from `bytes`.
+    let leaves_with_values = (bytes.len() + (BYTES_PER_CHUNK - 1)) / BYTES_PER_CHUNK;
+
+    // The number of parents that have at least one non-padding leaf.
+    //
+    // Since there is more than one node in this tree (see prior assertion), there should always be
+    // one or more initial parent nodes.
+    let initial_parents_with_values = std::cmp::max(1, next_even_number(leaves_with_values) / 2);
+
+    // The number of leaves in the full tree (including padding nodes).
+    let num_leaves = std::cmp::max(leaves_with_values, min_leaves).next_power_of_two();
+
+    // The number of levels in the tree.
+    //
+    // A tree with a single node has `height == 1`.
+    let height = num_leaves.trailing_zeros() as usize + 1;
+
+    assert!(height >= 2, "The tree should have two or more heights");
+
+    // A buffer/scratch-space used for storing each round of hashes at each height.
+    //
+    // This buffer is kept as small as possible; it will shrink so it never stores a padding node.
+    let mut chunks = ChunkStore::with_capacity(initial_parents_with_values);
+
+    // Create a parent in the `chunks` buffer for every two chunks in `bytes`.
+    //
+    // I.e., do the first round of hashing, hashing from the `bytes` slice and filling the `chunks`
+    // struct.
+    for i in 0..initial_parents_with_values {
+        let start = i * BYTES_PER_CHUNK * 2;
+
+        // Hash two chunks, creating a parent chunk.
+        let hash = match bytes.get(start..start + BYTES_PER_CHUNK * 2) {
+            // All bytes are available, hash as usual.
+            Some(slice) => hash(slice),
+            // Unable to get all the bytes, get a small slice and pad it out.
+            None => {
+                let mut preimage = bytes
+                    .get(start..)
+                    .expect("`i` can only be larger than zero if there are bytes to read")
+                    .to_vec();
+                preimage.resize(BYTES_PER_CHUNK * 2, 0);
+                hash(&preimage)
+            }
+        };
+
+        assert_eq!(
+            hash.len(),
+            BYTES_PER_CHUNK,
+            "Hashes should be exactly one chunk"
+        );
+
+        // Store the parent node.
+        chunks
+            .set(i, &hash)
+            .expect("Buffer should always have capacity for parent nodes")
+    }
+
+    // Iterate through all heights above the leaf nodes and either (a) hash two children or, (b)
+    // hash a left child and a right padding node.
+    //
+    // Skip the 0'th height because the leaves have already been processed. Skip the highest-height
+    // in the tree as it is the root does not require hashing.
+    //
+    // The padding nodes for each height are cached via `lazy static` to simulate non-adjacent
+    // padding nodes (i.e., avoid doing unnecessary hashing).
+    for height in 1..height - 1 {
+        let child_nodes = chunks.len();
+        let parent_nodes = next_even_number(child_nodes) / 2;
+
+        // For each pair of nodes stored in `chunks`:
+        //
+        // - If two nodes are available, hash them to form a parent.
+        // - If one node is available, hash it and a cached padding node to form a parent.
+        for i in 0..parent_nodes {
+            let (left, right) = match (chunks.get(i * 2), chunks.get(i * 2 + 1)) {
+                (Ok(left), Ok(right)) => (left, right),
+                (Ok(left), Err(_)) => (left, get_zero_hash(height)),
+                // Deriving `parent_nodes` from `chunks.len()` has ensured that we never encounter the
+                // scenario where we expect two nodes but there are none.
+                (Err(_), Err(_)) => unreachable!("Parent must have one child"),
+                // `chunks` is a contiguous array so it is impossible for an index to be missing
+                // when a higher index is present.
+                (Err(_), Ok(_)) => unreachable!("Parent must have a left child"),
+            };
+
+            assert!(
+                left.len() == right.len() && right.len() == BYTES_PER_CHUNK,
+                "Both children should be `BYTES_PER_CHUNK` bytes."
+            );
+
+            let hash = hash_concat(left, right);
+
+            // Store a parent node.
+            chunks
+                .set(i, &hash)
+                .expect("Buf is adequate size for parent");
+        }
+
+        // Shrink the buffer so it neatly fits the number of new nodes created in this round.
+        //
+        // The number of `parent_nodes` is either decreasing or stable. It never increases.
+        chunks.truncate(parent_nodes);
+    }
+
+    // There should be a single chunk left in the buffer and it is the Merkle root.
+    let root = chunks.into_vec();
+
+    assert_eq!(root.len(), BYTES_PER_CHUNK, "Only one chunk should remain");
+
+    root
+}
+
+/// A helper struct for storing words of `BYTES_PER_CHUNK` size in a flat byte array.
+#[derive(Debug)]
+struct ChunkStore(Vec<u8>);
+
+impl ChunkStore {
+    /// Creates a new instance with `chunks` padding nodes.
+    fn with_capacity(chunks: usize) -> Self {
+        Self(vec![0; chunks * BYTES_PER_CHUNK])
+    }
+
+    /// Set the `i`th chunk to `value`.
+    ///
+    /// Returns `Err` if `value.len() != BYTES_PER_CHUNK` or `i` is out-of-bounds.
+    fn set(&mut self, i: usize, value: &[u8]) -> Result<(), ()> {
+        if i < self.len() && value.len() == BYTES_PER_CHUNK {
+            let slice = &mut self.0[i * BYTES_PER_CHUNK..i * BYTES_PER_CHUNK + BYTES_PER_CHUNK];
+            slice.copy_from_slice(value);
+            Ok(())
+        } else {
+            Err(())
+        }
+    }
+
+    /// Gets the `i`th chunk.
+    ///
+    /// Returns `Err` if `i` is out-of-bounds.
+    fn get(&self, i: usize) -> Result<&[u8], ()> {
+        if i < self.len() {
+            Ok(&self.0[i * BYTES_PER_CHUNK..i * BYTES_PER_CHUNK + BYTES_PER_CHUNK])
+        } else {
+            Err(())
+        }
+    }
+
+    /// Returns the number of chunks presently stored in `self`.
+    fn len(&self) -> usize {
+        self.0.len() / BYTES_PER_CHUNK
+    }
+
+    /// Truncates 'self' to `num_chunks` chunks.
+    ///
+    /// Functionally identical to `Vec::truncate`.
+    fn truncate(&mut self, num_chunks: usize) {
+        self.0.truncate(num_chunks * BYTES_PER_CHUNK)
+    }
+
+    /// Consumes `self`, returning the underlying byte array.
+    fn into_vec(self) -> Vec<u8> {
+        self.0
+    }
+}
+
+/// Returns a cached padding node for a given height.
+fn get_zero_hash(height: usize) -> &'static [u8] {
+    if height <= MAX_TREE_DEPTH {
+        &ZERO_HASHES[height]
+    } else {
+        panic!("Tree exceeds MAX_TREE_DEPTH of {}", MAX_TREE_DEPTH)
+    }
+}
+
+/// Concatenate two vectors.
+fn concat(mut vec1: Vec<u8>, mut vec2: Vec<u8>) -> Vec<u8> {
+    vec1.append(&mut vec2);
+    vec1
+}
+
+/// Compute the hash of two other hashes concatenated.
+fn hash_concat(h1: &[u8], h2: &[u8]) -> Vec<u8> {
+    hash(&concat(h1.to_vec(), h2.to_vec()))
+}
+
+/// Returns the next even number following `n`. If `n` is even, `n` is returned.
+fn next_even_number(n: usize) -> usize {
+    n + n % 2
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    pub fn reference_root(bytes: &[u8]) -> Vec<u8> {
+        crate::merkleize_standard(&bytes)[0..32].to_vec()
+    }
+
+    macro_rules! common_tests {
+        ($get_bytes: ident) => {
+            #[test]
+            fn zero_value_0_nodes() {
+                test_against_reference(&$get_bytes(0 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_1_nodes() {
+                test_against_reference(&$get_bytes(1 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_2_nodes() {
+                test_against_reference(&$get_bytes(2 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_3_nodes() {
+                test_against_reference(&$get_bytes(3 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_4_nodes() {
+                test_against_reference(&$get_bytes(4 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_8_nodes() {
+                test_against_reference(&$get_bytes(8 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_9_nodes() {
+                test_against_reference(&$get_bytes(9 * BYTES_PER_CHUNK), 0);
+            }
+
+            #[test]
+            fn zero_value_8_nodes_varying_min_length() {
+                for i in 0..64 {
+                    test_against_reference(&$get_bytes(8 * BYTES_PER_CHUNK), i);
+                }
+            }
+
+            #[test]
+            fn zero_value_range_of_nodes() {
+                for i in 0..32 * BYTES_PER_CHUNK {
+                    test_against_reference(&$get_bytes(i), 0);
+                }
+            }
+
+            #[test]
+            fn max_tree_depth_min_nodes() {
+                let input = vec![0; 10 * BYTES_PER_CHUNK];
+                let min_nodes = 2usize.pow(MAX_TREE_DEPTH as u32);
+                assert_eq!(
+                    merkleize_padded(&input, min_nodes),
+                    get_zero_hash(MAX_TREE_DEPTH)
+                );
+            }
+        };
+    }
+
+    mod zero_value {
+        use super::*;
+
+        fn zero_bytes(bytes: usize) -> Vec<u8> {
+            vec![0; bytes]
+        }
+
+        common_tests!(zero_bytes);
+    }
+
+    mod random_value {
+        use super::*;
+        use rand::RngCore;
+
+        fn random_bytes(bytes: usize) -> Vec<u8> {
+            let mut bytes = Vec::with_capacity(bytes);
+            rand::thread_rng().fill_bytes(&mut bytes);
+            bytes
+        }
+
+        common_tests!(random_bytes);
+    }
+
+    fn test_against_reference(input: &[u8], min_nodes: usize) {
+        let mut reference_input = input.to_vec();
+        reference_input.resize(
+            std::cmp::max(
+                reference_input.len(),
+                min_nodes.next_power_of_two() * BYTES_PER_CHUNK,
+            ),
+            0,
+        );
+
+        assert_eq!(
+            reference_root(&reference_input),
+            merkleize_padded(&input, min_nodes),
+            "input.len(): {:?}",
+            input.len()
+        );
+    }
+}
--- a/eth2/utils/tree_hash/src/merkleize_standard.rs
+++ b/eth2/utils/tree_hash/src/merkleize_standard.rs
@ -1,12 +1,23 @@
 use super::*;
 use hashing::hash;

-pub fn merkle_root(bytes: &[u8]) -> Vec<u8> {
-    // TODO: replace this with a more memory efficient method.
-    efficient_merkleize(&bytes)[0..32].to_vec()
-}
-
-pub fn efficient_merkleize(bytes: &[u8]) -> Vec<u8> {
+/// Merkleizes bytes and returns the root, using a simple algorithm that does not optimize to avoid
+/// processing or storing padding bytes.
+///
+/// The input `bytes` will be padded to ensure that the number of leaves is a power-of-two.
+///
+/// It is likely a better choice to use [merkleize_padded](fn.merkleize_padded.html) instead.
+///
+/// ## CPU Performance
+///
+/// Will hash all nodes in the tree, even if they are padding and pre-determined.
+///
+/// ## Memory Performance
+///
+///  - Duplicates the input `bytes`.
+///  - Stores all internal nodes, even if they are padding.
+///  - Does not free up unused memory during operation.
+pub fn merkleize_standard(bytes: &[u8]) -> Vec<u8> {
    // If the bytes are just one chunk (or less than one chunk) just return them.
    if bytes.len() <= HASHSIZE {
        let mut o = bytes.to_vec();
--- a/eth2/utils/tree_hash_derive/src/lib.rs
+++ b/eth2/utils/tree_hash_derive/src/lib.rs
@ -150,7 +150,7 @@ pub fn tree_hash_derive(input: TokenStream) -> TokenStream {
                    leaves.append(&mut self.#idents.tree_hash_root());
                )*

-                tree_hash::merkleize::merkle_root(&leaves)
+                tree_hash::merkle_root(&leaves)
            }
        }
    };
@ -180,7 +180,7 @@ pub fn tree_hash_signed_root_derive(input: TokenStream) -> TokenStream {
                    leaves.append(&mut self.#idents.tree_hash_root());
                )*

-                tree_hash::merkleize::merkle_root(&leaves)
+                tree_hash::merkle_root(&leaves)
            }
        }
    };
--- a/eth2/utils/tree_hash_derive/tests/tests.rs
+++ b/eth2/utils/tree_hash_derive/tests/tests.rs
@ -1,5 +1,5 @@
 use cached_tree_hash::{CachedTreeHash, TreeHashCache};
-use tree_hash::{merkleize::merkle_root, SignedRoot, TreeHash};
+use tree_hash::{merkle_root, SignedRoot, TreeHash};
 use tree_hash_derive::{CachedTreeHash, SignedRoot, TreeHash};

 #[derive(Clone, Debug, TreeHash, CachedTreeHash)]