Added tree_hash impl for Vec and Hashtable (list and container); plus various cleanup code

2018-12-08 15:25:59 -08:00 · 2018-12-08 15:25:59 -08:00 · bfcce4fe47
commit bfcce4fe47
parent 56b1639f10
5 changed files with 99 additions and 88 deletions
--- a/beacon_chain/types/src/shard_and_committee.rs
+++ b/beacon_chain/types/src/shard_and_committee.rs
@ -18,10 +18,6 @@ impl ShardAndCommittee {
 }

 impl TreeHash for ShardAndCommittee {
-    // python sample code:
-    //    def hash_shard_and_committee(val):
-    //        committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee])
-    //        return hash(val.shard_id.to_bytes(2, 'big') + committee)
    fn tree_hash(&self) -> Vec<u8> {
        let mut committee_ssz_items = Vec::new();
        for c in &self.committee {
--- a/beacon_chain/types/src/validator_record.rs
+++ b/beacon_chain/types/src/validator_record.rs
@ -2,6 +2,16 @@ use super::bls::{Keypair, PublicKey};
 use super::ssz::TreeHash;
 use super::{Address, Hash256};

+pub const HASH_SSZ_VALIDATOR_RECORD_LENGTH: usize = {
+    32 +                // pubkey.to_bytes(32, 'big')
+    2 +                 // withdrawal_shard.to_bytes(2, 'big')
+    20 +                // withdrawal_address
+    32 +                // randao_commitment
+    16 +                // balance.to_bytes(16, 'big')
+    16 +                // start_dynasty.to_bytes(8, 'big')
+    8 // end_dynasty.to_bytes(8, 'big')
+};
+
 #[derive(Debug, PartialEq, Clone, Copy)]
 pub enum ValidatorStatus {
    PendingActivation = 0,
@ -46,22 +56,15 @@ impl ValidatorRecord {
 }

 impl TreeHash for ValidatorRecord {
-    /* python sample code:
-        def hash_validator_record(val):
-            return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \
-                val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \
-                val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big'))
-    */
    fn tree_hash(&self) -> Vec<u8> {
-        // the serialized fields, to be hashed, should add up to 118 bytes in length.
-        // allocating it once here
-        let mut ssz = Vec::with_capacity(118);
+        let mut ssz = Vec::with_capacity(HASH_SSZ_VALIDATOR_RECORD_LENGTH);

-        // "val.pubkey.to_bytes(32, 'big')" logic
+        // From python sample: "val.pubkey.to_bytes(32, 'big')"
        // TODO:
-        // probably all kinds of wrong here. Not sure how to convert (szz)
-        // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on
-        // PublicKey, returns a 192 byte array.
+        // Need to actually convert (szz) pubkey into a big-endian 32 byte
+        // array.
+        // Also, our ValidatorRecord seems to be missing the start_dynasty
+        // and end_dynasty fields
        let pub_key_bytes = &mut self.pubkey.as_bytes();
        pub_key_bytes.resize(32, 0);
        ssz.append(pub_key_bytes);
@ -70,17 +73,10 @@ impl TreeHash for ValidatorRecord {
        ssz.append(&mut self.withdrawal_address.tree_hash());
        ssz.append(&mut self.randao_commitment.tree_hash());

-        // balance is a 64bit number that serializes to 8 bytes.
-        // Right padding here to resize to 16 bytes - not sure why
-        // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')"
        let mut balance = self.balance.tree_hash();
        balance.resize(16, 0);
        ssz.append(&mut balance);

-        // TODO:
-        // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')
-        // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields
-
        ssz.tree_hash()
    }
 }
--- a/beacon_chain/utils/ssz/src/impl_encode.rs
+++ b/beacon_chain/utils/ssz/src/impl_encode.rs
@ -48,14 +48,12 @@ impl_encodable_for_uint!(usize, 64);

 impl Encodable for H256 {
    fn ssz_append(&self, s: &mut SszStream) {
-        assert_eq!(32, self.len());
        s.append_encoded_raw(&self.to_vec());
    }
 }

 impl Encodable for Address {
    fn ssz_append(&self, s: &mut SszStream) {
-        assert_eq!(20, self.len());
        s.append_encoded_raw(&self)
    }
 }
@ -73,7 +71,7 @@ mod tests {
    }

    #[test]
-    fn test_ssz_encode_adress() {
+    fn test_ssz_encode_address() {
        let h = Address::zero();
        let mut ssz = SszStream::new();
        ssz.append(&h);
--- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs
+++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs
@ -1,13 +1,11 @@
 extern crate blake2_rfc;

 use self::blake2_rfc::blake2b::blake2b;
-
 use super::ethereum_types::{Address, H256};
 use super::{ssz_encode, TreeHash};
-
-// I haven't added tests for tree_hash implementations that simply pass
-// thru to the szz_encode lib for which tests already exist. Do we want
-// test anyway?
+use std::cmp::Ord;
+use std::collections::HashMap;
+use std::hash::Hash;

 impl TreeHash for u8 {
    fn tree_hash(&self) -> Vec<u8> {
@ -45,23 +43,87 @@ impl TreeHash for H256 {
    }
 }

-// hash byte arrays
 impl TreeHash for [u8] {
    fn tree_hash(&self) -> Vec<u8> {
        hash(&self)
    }
 }

-/**
- * From the Spec:
- *      We define hash(x) as BLAKE2b-512(x)[0:32]
- * From the python sample code:
- *      return blake2b(x).digest()[:32]
- *
- * This was orginally writting for blake2s before it was changed to blake2b
- * Perhaps, we should be using 'canonical_hash' in the hashing lib?
- */
+impl<T> TreeHash for Vec<T>
+where
+    T: TreeHash,
+{
+    /// Appends the tree_hash for each value of 'self' into a byte array
+    /// and returns the hash of said byte array
+    fn tree_hash(&self) -> Vec<u8> {
+        let mut result = Vec::new();
+        for x in self {
+            result.append(&mut x.tree_hash());
+        }
+
+        hash(&result)
+    }
+}
+
+impl<K, V> TreeHash for HashMap<K, V>
+where
+    K: Eq,
+    K: Hash,
+    K: Ord,
+    V: TreeHash,
+{
+    /// Appends the tree_hash for each value of 'self, sorted by key,
+    /// into a byte array and returns the hash of said byte array
+    fn tree_hash(&self) -> Vec<u8> {
+        let mut items: Vec<_> = self.iter().collect();
+        items.sort_by(|a, b| a.0.cmp(b.0));
+        let mut result = Vec::new();
+        for item in items {
+            result.append(&mut item.1.tree_hash());
+        }
+
+        hash(&result)
+    }
+}
+
+/// From the Spec:
+///   We define hash(x) as BLAKE2b-512(x)[0:32]
 fn hash(data: &[u8]) -> Vec<u8> {
    let result = blake2b(32, &[], &data);
    result.as_bytes().to_vec()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_impl_tree_hash_vec() {
+        let result = vec![1u32, 2, 3, 4, 5, 6, 7].tree_hash();
+        assert_eq!(result.len(), 32);
+    }
+
+    #[test]
+    fn test_impl_tree_hash_hashmap() {
+        let mut map = HashMap::new();
+        map.insert("c", 3);
+        map.insert("b", 2);
+        map.insert("g", 7);
+        map.insert("d", 6);
+        map.insert("e", 4);
+        map.insert("a", 1u32);
+        map.insert("f", 5);
+        let result = map.tree_hash();
+
+        // TODO: resolve inconsistencies between the python sample code and
+        //       the spec; and create tests that tie-out to an offical result
+        assert_eq!(
+            result,
+            [
+                59, 110, 242, 24, 177, 184, 73, 109, 190, 19, 172, 39, 74, 94, 224, 198, 0, 170,
+                225, 152, 249, 59, 10, 76, 137, 124, 52, 159, 37, 42, 26, 157
+            ]
+        );
+    }
+
+}
--- a/beacon_chain/utils/ssz/src/tree_hash.rs
+++ b/beacon_chain/utils/ssz/src/tree_hash.rs
@ -2,71 +2,32 @@ const CHUNKSIZE: usize = 128;
 const HASHSIZE: usize = 32;

 pub trait TreeHash {
-    // Note: it would be nice to have a default trait implementation here
-    // i.e. szz_encode(self) - but rust complains it does not know
-    // the size of 'self'.  Not sure if there's a way around this.
-
    fn tree_hash(&self) -> Vec<u8>;
 }

-// python example:  Note - I'm seeing some inconsistencies
-// between this and the 'Tree Hash' section in the SSZ spec.
-// So, I imagine it will change.
-/* def merkle_hash(lst):
-    # Concatenate list into data
-    if len(lst[0]) != next_power_of_2(len(lst[0])):
-        lst = [extend_to_power_of_2(x) for x in lst]
-    data = b''.join(lst)
-    # Add padding
-    data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE))
-    assert len(data) % CHUNKSIZE == 0
-    # Store length (to compensate for non-bijectiveness of padding)
-    datalen = len(lst).to_bytes(32, 'big')
-    # Convert to chunks
-    chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)]
-    chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE]
-    for i in range(len(chunkz)//2 - 1, 0, -1):
-        chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1])
-    return hash(chunkz[1] + datalen) */
-
-/**
- * Returns a 32 byte hash of 'list', a vector of byte vectors.
- * Note that this will consume 'list'.
- * */
+/// Returns a 32 byte hash of 'list' - a vector of byte vectors.
+/// Note that this will consume 'list'.
 pub fn merkle_hash(list: &mut Vec<Vec<u8>>) -> Vec<u8> {
    // flatten list
    let data = &mut list_to_blob(list);

-    // data should be divisible by CHUNKSIZE
-    assert_eq!(data.len() % CHUNKSIZE, 0);
-
    // get data_len as bytes. It will hashed will the merkle root
    let dlen = data.len() as u64;
    let data_len_bytes = &mut dlen.tree_hash();
    data_len_bytes.resize(32, 0);

    // merklize
-    //
-    // From the Spec:
-    // while len(chunkz) > 1:
-    //    if len(chunkz) % 2 == 1:
-    //        chunkz.append(b'\x00' * SSZ_CHUNK_SIZE)
-    //    chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)]
    let mut mhash = hash_level(data, CHUNKSIZE);
    while mhash.len() > HASHSIZE {
        mhash = hash_level(&mut mhash, HASHSIZE);
    }

-    assert_eq!(mhash.len(), HASHSIZE);
-
    mhash.append(data_len_bytes);
    mhash.tree_hash()
 }

-/**
- * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into
- * a byte vector of hashes, divisible by the 32 byte hashsize
- */
+/// Takes a flat vector of bytes. It then hashes 'chunk_size * 2' slices into
+/// a byte vector of hashes, divisible by HASHSIZE
 fn hash_level(data: &mut Vec<u8>, chunk_size: usize) -> Vec<u8> {
    assert!(data.len() % chunk_size == 0);

@ -119,9 +80,7 @@ fn list_to_blob(list: &mut Vec<Vec<u8>>) -> Vec<u8> {
    data
 }

-/**
- * Extends data length to a power of 2 by minimally right-zero-padding
- */
+/// Extends data length to a power of 2 by minimally right-zero-padding
 fn extend_to_power_of_2(data: &mut Vec<u8>) {
    let len = data.len();
    let new_len = len.next_power_of_two();