WIP for tree_hash

2018-12-04 12:37:12 -08:00 · 2018-12-04 12:37:12 -08:00 · 56b1639f10
commit 56b1639f10
parent c1c37098d7
7 changed files with 305 additions and 14 deletions
--- a/beacon_chain/types/src/shard_and_committee.rs
+++ b/beacon_chain/types/src/shard_and_committee.rs
@ -1,3 +1,5 @@
+use super::ssz::{merkle_hash, TreeHash};
+
 #[derive(Clone, Debug, PartialEq)]
 pub struct ShardAndCommittee {
    pub shard: u16,
@ -15,6 +17,26 @@ impl ShardAndCommittee {
    }
 }

+impl TreeHash for ShardAndCommittee {
+    // python sample code:
+    //    def hash_shard_and_committee(val):
+    //        committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee])
+    //        return hash(val.shard_id.to_bytes(2, 'big') + committee)
+    fn tree_hash(&self) -> Vec<u8> {
+        let mut committee_ssz_items = Vec::new();
+        for c in &self.committee {
+            let mut h = (*c as u32).tree_hash();
+            h.resize(3, 0);
+            committee_ssz_items.push(h);
+        }
+        let mut result = Vec::new();
+        result.append(&mut self.shard.tree_hash());
+        result.append(&mut merkle_hash(&mut committee_ssz_items));
+
+        result.tree_hash()
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -25,4 +47,15 @@ mod tests {
        assert_eq!(s.shard, 0);
        assert_eq!(s.committee.len(), 0);
    }
+
+    #[test]
+    fn test_shard_and_committee_tree_hash() {
+        let s = ShardAndCommittee {
+            shard: 1,
+            committee: vec![1, 2, 3],
+        };
+
+        // should test a known hash value
+        assert_eq!(s.tree_hash().len(), 32);
+    }
 }
--- a/beacon_chain/types/src/validator_record.rs
+++ b/beacon_chain/types/src/validator_record.rs
@ -1,4 +1,5 @@
 use super::bls::{Keypair, PublicKey};
+use super::ssz::TreeHash;
 use super::{Address, Hash256};

 #[derive(Debug, PartialEq, Clone, Copy)]
@ -44,6 +45,46 @@ impl ValidatorRecord {
    }
 }

+impl TreeHash for ValidatorRecord {
+    /* python sample code:
+        def hash_validator_record(val):
+            return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \
+                val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \
+                val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big'))
+    */
+    fn tree_hash(&self) -> Vec<u8> {
+        // the serialized fields, to be hashed, should add up to 118 bytes in length.
+        // allocating it once here
+        let mut ssz = Vec::with_capacity(118);
+
+        // "val.pubkey.to_bytes(32, 'big')" logic
+        // TODO:
+        // probably all kinds of wrong here. Not sure how to convert (szz)
+        // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on
+        // PublicKey, returns a 192 byte array.
+        let pub_key_bytes = &mut self.pubkey.as_bytes();
+        pub_key_bytes.resize(32, 0);
+        ssz.append(pub_key_bytes);
+
+        ssz.append(&mut self.withdrawal_shard.tree_hash());
+        ssz.append(&mut self.withdrawal_address.tree_hash());
+        ssz.append(&mut self.randao_commitment.tree_hash());
+
+        // balance is a 64bit number that serializes to 8 bytes.
+        // Right padding here to resize to 16 bytes - not sure why
+        // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')"
+        let mut balance = self.balance.tree_hash();
+        balance.resize(16, 0);
+        ssz.append(&mut balance);
+
+        // TODO:
+        // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')
+        // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields
+
+        ssz.tree_hash()
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -59,4 +100,13 @@ mod tests {
        assert_eq!(v.status, 0);
        assert_eq!(v.exit_slot, 0);
    }
+
+    #[test]
+    fn test_validator_record_ree_hash() {
+        let (v, _kp) = ValidatorRecord::zero_with_thread_rand_keypair();
+        let h = v.tree_hash();
+
+        // TODO: should check a known hash result value
+        assert_eq!(h.len(), 32);
+    }
 }
--- a/beacon_chain/utils/ssz/Cargo.toml
+++ b/beacon_chain/utils/ssz/Cargo.toml
@ -6,3 +6,4 @@ authors = ["Paul Hauner <paul@paulhauner.com>"]
 [dependencies]
 bytes = "0.4.9"
 ethereum-types = "0.4.0"
+blake2-rfc = "0.2.18"
--- a/beacon_chain/utils/ssz/src/impl_encode.rs
+++ b/beacon_chain/utils/ssz/src/impl_encode.rs
@ -1,7 +1,7 @@
 extern crate bytes;

 use self::bytes::{BufMut, BytesMut};
-use super::ethereum_types::H256;
+use super::ethereum_types::{Address, H256};
 use super::{Encodable, SszStream};

 /*
@ -48,10 +48,18 @@ impl_encodable_for_uint!(usize, 64);

 impl Encodable for H256 {
    fn ssz_append(&self, s: &mut SszStream) {
+        assert_eq!(32, self.len());
        s.append_encoded_raw(&self.to_vec());
    }
 }

+impl Encodable for Address {
+    fn ssz_append(&self, s: &mut SszStream) {
+        assert_eq!(20, self.len());
+        s.append_encoded_raw(&self)
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -64,6 +72,14 @@ mod tests {
        assert_eq!(ssz.drain(), vec![0; 32]);
    }

+    #[test]
+    fn test_ssz_encode_adress() {
+        let h = Address::zero();
+        let mut ssz = SszStream::new();
+        ssz.append(&h);
+        assert_eq!(ssz.drain(), vec![0; 20]);
+    }
+
    #[test]
    fn test_ssz_encode_u8() {
        let x: u8 = 0;
--- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs
+++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs
@ -0,0 +1,67 @@
+extern crate blake2_rfc;
+
+use self::blake2_rfc::blake2b::blake2b;
+
+use super::ethereum_types::{Address, H256};
+use super::{ssz_encode, TreeHash};
+
+// I haven't added tests for tree_hash implementations that simply pass
+// thru to the szz_encode lib for which tests already exist. Do we want
+// test anyway?
+
+impl TreeHash for u8 {
+    fn tree_hash(&self) -> Vec<u8> {
+        ssz_encode(self)
+    }
+}
+
+impl TreeHash for u16 {
+    fn tree_hash(&self) -> Vec<u8> {
+        ssz_encode(self)
+    }
+}
+
+impl TreeHash for u32 {
+    fn tree_hash(&self) -> Vec<u8> {
+        ssz_encode(self)
+    }
+}
+
+impl TreeHash for u64 {
+    fn tree_hash(&self) -> Vec<u8> {
+        ssz_encode(self)
+    }
+}
+
+impl TreeHash for Address {
+    fn tree_hash(&self) -> Vec<u8> {
+        ssz_encode(self)
+    }
+}
+
+impl TreeHash for H256 {
+    fn tree_hash(&self) -> Vec<u8> {
+        ssz_encode(self)
+    }
+}
+
+// hash byte arrays
+impl TreeHash for [u8] {
+    fn tree_hash(&self) -> Vec<u8> {
+        hash(&self)
+    }
+}
+
+/**
+ * From the Spec:
+ *      We define hash(x) as BLAKE2b-512(x)[0:32]
+ * From the python sample code:
+ *      return blake2b(x).digest()[:32]
+ *
+ * This was orginally writting for blake2s before it was changed to blake2b
+ * Perhaps, we should be using 'canonical_hash' in the hashing lib?
+ */
+fn hash(data: &[u8]) -> Vec<u8> {
+    let result = blake2b(32, &[], &data);
+    result.as_bytes().to_vec()
+}
--- a/beacon_chain/utils/ssz/src/lib.rs
+++ b/beacon_chain/utils/ssz/src/lib.rs
@ -12,12 +12,15 @@ extern crate ethereum_types;

 pub mod decode;
 pub mod encode;
+pub mod tree_hash;

 mod impl_decode;
 mod impl_encode;
+mod impl_tree_hash;

 pub use decode::{decode_ssz, decode_ssz_list, Decodable, DecodeError};
 pub use encode::{Encodable, SszStream};
+pub use tree_hash::{merkle_hash, TreeHash};

 pub const LENGTH_BYTES: usize = 4;
 pub const MAX_LIST_SIZE: usize = 1 << (4 * 8);
--- a/beacon_chain/utils/ssz/src/tree_hash.rs
+++ b/beacon_chain/utils/ssz/src/tree_hash.rs
@ -1,6 +1,123 @@
-extern crate blake2_rfc;
+const CHUNKSIZE: usize = 128;
+const HASHSIZE: usize = 32;

-use self::blake2_rfc::blake2s::blake2s;
+pub trait TreeHash {
+    // Note: it would be nice to have a default trait implementation here
+    // i.e. szz_encode(self) - but rust complains it does not know
+    // the size of 'self'.  Not sure if there's a way around this.
+
+    fn tree_hash(&self) -> Vec<u8>;
+}
+
+// python example:  Note - I'm seeing some inconsistencies
+// between this and the 'Tree Hash' section in the SSZ spec.
+// So, I imagine it will change.
+/* def merkle_hash(lst):
+    # Concatenate list into data
+    if len(lst[0]) != next_power_of_2(len(lst[0])):
+        lst = [extend_to_power_of_2(x) for x in lst]
+    data = b''.join(lst)
+    # Add padding
+    data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE))
+    assert len(data) % CHUNKSIZE == 0
+    # Store length (to compensate for non-bijectiveness of padding)
+    datalen = len(lst).to_bytes(32, 'big')
+    # Convert to chunks
+    chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)]
+    chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE]
+    for i in range(len(chunkz)//2 - 1, 0, -1):
+        chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1])
+    return hash(chunkz[1] + datalen) */
+
+/**
+ * Returns a 32 byte hash of 'list', a vector of byte vectors.
+ * Note that this will consume 'list'.
+ * */
+pub fn merkle_hash(list: &mut Vec<Vec<u8>>) -> Vec<u8> {
+    // flatten list
+    let data = &mut list_to_blob(list);
+
+    // data should be divisible by CHUNKSIZE
+    assert_eq!(data.len() % CHUNKSIZE, 0);
+
+    // get data_len as bytes. It will hashed will the merkle root
+    let dlen = data.len() as u64;
+    let data_len_bytes = &mut dlen.tree_hash();
+    data_len_bytes.resize(32, 0);
+
+    // merklize
+    //
+    // From the Spec:
+    // while len(chunkz) > 1:
+    //    if len(chunkz) % 2 == 1:
+    //        chunkz.append(b'\x00' * SSZ_CHUNK_SIZE)
+    //    chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)]
+    let mut mhash = hash_level(data, CHUNKSIZE);
+    while mhash.len() > HASHSIZE {
+        mhash = hash_level(&mut mhash, HASHSIZE);
+    }
+
+    assert_eq!(mhash.len(), HASHSIZE);
+
+    mhash.append(data_len_bytes);
+    mhash.tree_hash()
+}
+
+/**
+ * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into
+ * a byte vector of hashes, divisible by the 32 byte hashsize
+ */
+fn hash_level(data: &mut Vec<u8>, chunk_size: usize) -> Vec<u8> {
+    assert!(data.len() % chunk_size == 0);
+
+    let mut result: Vec<u8> = Vec::new();
+    for two_chunks in data.chunks(chunk_size * 2) {
+        if two_chunks.len() == chunk_size && data.len() > chunk_size {
+            // if there is only one chunk here, hash it with a zero-byte
+            // CHUNKSIZE vector
+            let mut c = two_chunks.to_vec();
+            c.append(&mut vec![0; CHUNKSIZE]);
+            result.append(&mut c.tree_hash());
+        } else {
+            result.append(&mut two_chunks.tree_hash());
+        }
+    }
+
+    result
+}
+
+fn list_to_blob(list: &mut Vec<Vec<u8>>) -> Vec<u8> {
+    let mut data_len = 0;
+    if list[0].len().is_power_of_two() == false {
+        for x in list.iter_mut() {
+            extend_to_power_of_2(x);
+            data_len += x.len();
+        }
+    }
+
+    // do we need padding?
+    let extend_by = if data_len % CHUNKSIZE > 0 {
+        CHUNKSIZE - (data_len % CHUNKSIZE)
+    } else {
+        0
+    };
+
+    // allocate buffer and append each list element (flatten the vec of vecs)
+    data_len += extend_by;
+    let mut data: Vec<u8> = Vec::with_capacity(data_len);
+    for x in list.iter_mut() {
+        data.append(x);
+    }
+
+    // add padding
+    let mut i = 0;
+    while i < extend_by {
+        data.push(0);
+        i += 1;
+    }
+
+    data
+}

 /**
 * Extends data length to a power of 2 by minimally right-zero-padding
@ -9,16 +126,10 @@ fn extend_to_power_of_2(data: &mut Vec<u8>) {
    let len = data.len();
    let new_len = len.next_power_of_two();
    if new_len > len {
-        data.append(&mut vec![0; new_len - len]);
+        data.resize(new_len, 0);
    }
 }

-fn hash(data: Vec<u8>) -> Vec<u8> {
-    let result = blake2s(32, &[], data.as_slice());
-    result.as_bytes().to_vec()
-}
-
-// fn list_to_glob() {}
 #[cfg(test)]
 mod tests {
    use super::*;
@ -26,11 +137,21 @@ mod tests {
    #[test]
    fn test_extend_to_power_of_2() {
        let mut data = vec![1, 2, 3, 4, 5];
-
-        // an array length of 5 should be extended to
-        // a length of 8 (the next power of 2) by right
-        // padding it with 3 zeros
        extend_to_power_of_2(&mut data);
        assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]);
    }
+
+    #[test]
+    fn test_merkle_hash() {
+        let data1 = vec![1; 100];
+        let data2 = vec![2; 100];
+        let data3 = vec![3; 100];
+        let mut list = vec![data1, data2, data3];
+        let result = merkle_hash(&mut list);
+
+        //note: should test againt a known test hash value
+        assert_eq!(HASHSIZE, result.len());
+        println!("merkle_hash: {:?}", result);
+    }
+
 }