From f11c619ef5810bd1061094a0706455e774b61dca Mon Sep 17 00:00:00 2001 From: mjkeating Date: Tue, 4 Dec 2018 12:37:12 -0800 Subject: [PATCH] WIP for tree_hash --- beacon_chain/types/src/shard_and_committee.rs | 33 ++++ beacon_chain/types/src/validator_record.rs | 50 ++++++ beacon_chain/utils/ssz/Cargo.toml | 1 + beacon_chain/utils/ssz/src/impl_encode.rs | 18 ++- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 67 ++++++++ beacon_chain/utils/ssz/src/lib.rs | 3 + beacon_chain/utils/ssz/src/tree_hash.rs | 147 ++++++++++++++++-- 7 files changed, 305 insertions(+), 14 deletions(-) create mode 100644 beacon_chain/utils/ssz/src/impl_tree_hash.rs diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 44c2e57ff..8388b9cbc 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -1,3 +1,5 @@ +use super::ssz::{merkle_hash, TreeHash}; + #[derive(Clone, Debug, PartialEq)] pub struct ShardAndCommittee { pub shard: u16, @@ -15,6 +17,26 @@ impl ShardAndCommittee { } } +impl TreeHash for ShardAndCommittee { + // python sample code: + // def hash_shard_and_committee(val): + // committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee]) + // return hash(val.shard_id.to_bytes(2, 'big') + committee) + fn tree_hash(&self) -> Vec { + let mut committee_ssz_items = Vec::new(); + for c in &self.committee { + let mut h = (*c as u32).tree_hash(); + h.resize(3, 0); + committee_ssz_items.push(h); + } + let mut result = Vec::new(); + result.append(&mut self.shard.tree_hash()); + result.append(&mut merkle_hash(&mut committee_ssz_items)); + + result.tree_hash() + } +} + #[cfg(test)] mod tests { use super::*; @@ -25,4 +47,15 @@ mod tests { assert_eq!(s.shard, 0); assert_eq!(s.committee.len(), 0); } + + #[test] + fn test_shard_and_committee_tree_hash() { + let s = ShardAndCommittee { + shard: 1, + committee: vec![1, 2, 3], + }; + + // should test a known hash value + assert_eq!(s.tree_hash().len(), 32); + } } diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 3a15baeec..3d4a57e20 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -1,4 +1,5 @@ use super::bls::{Keypair, PublicKey}; +use super::ssz::TreeHash; use super::{Address, Hash256}; #[derive(Debug, PartialEq, Clone, Copy)] @@ -44,6 +45,46 @@ impl ValidatorRecord { } } +impl TreeHash for ValidatorRecord { + /* python sample code: + def hash_validator_record(val): + return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \ + val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \ + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')) + */ + fn tree_hash(&self) -> Vec { + // the serialized fields, to be hashed, should add up to 118 bytes in length. + // allocating it once here + let mut ssz = Vec::with_capacity(118); + + // "val.pubkey.to_bytes(32, 'big')" logic + // TODO: + // probably all kinds of wrong here. Not sure how to convert (szz) + // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on + // PublicKey, returns a 192 byte array. + let pub_key_bytes = &mut self.pubkey.as_bytes(); + pub_key_bytes.resize(32, 0); + ssz.append(pub_key_bytes); + + ssz.append(&mut self.withdrawal_shard.tree_hash()); + ssz.append(&mut self.withdrawal_address.tree_hash()); + ssz.append(&mut self.randao_commitment.tree_hash()); + + // balance is a 64bit number that serializes to 8 bytes. + // Right padding here to resize to 16 bytes - not sure why + // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')" + let mut balance = self.balance.tree_hash(); + balance.resize(16, 0); + ssz.append(&mut balance); + + // TODO: + // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big') + // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields + + ssz.tree_hash() + } +} + #[cfg(test)] mod tests { use super::*; @@ -59,4 +100,13 @@ mod tests { assert_eq!(v.status, 0); assert_eq!(v.exit_slot, 0); } + + #[test] + fn test_validator_record_ree_hash() { + let (v, _kp) = ValidatorRecord::zero_with_thread_rand_keypair(); + let h = v.tree_hash(); + + // TODO: should check a known hash result value + assert_eq!(h.len(), 32); + } } diff --git a/beacon_chain/utils/ssz/Cargo.toml b/beacon_chain/utils/ssz/Cargo.toml index aa4dc5d72..ec9100927 100644 --- a/beacon_chain/utils/ssz/Cargo.toml +++ b/beacon_chain/utils/ssz/Cargo.toml @@ -6,3 +6,4 @@ authors = ["Paul Hauner "] [dependencies] bytes = "0.4.9" ethereum-types = "0.4.0" +blake2-rfc = "0.2.18" \ No newline at end of file diff --git a/beacon_chain/utils/ssz/src/impl_encode.rs b/beacon_chain/utils/ssz/src/impl_encode.rs index 3f366bdf3..c9ca8b006 100644 --- a/beacon_chain/utils/ssz/src/impl_encode.rs +++ b/beacon_chain/utils/ssz/src/impl_encode.rs @@ -1,7 +1,7 @@ extern crate bytes; use self::bytes::{BufMut, BytesMut}; -use super::ethereum_types::H256; +use super::ethereum_types::{Address, H256}; use super::{Encodable, SszStream}; /* @@ -48,10 +48,18 @@ impl_encodable_for_uint!(usize, 64); impl Encodable for H256 { fn ssz_append(&self, s: &mut SszStream) { + assert_eq!(32, self.len()); s.append_encoded_raw(&self.to_vec()); } } +impl Encodable for Address { + fn ssz_append(&self, s: &mut SszStream) { + assert_eq!(20, self.len()); + s.append_encoded_raw(&self) + } +} + #[cfg(test)] mod tests { use super::*; @@ -64,6 +72,14 @@ mod tests { assert_eq!(ssz.drain(), vec![0; 32]); } + #[test] + fn test_ssz_encode_adress() { + let h = Address::zero(); + let mut ssz = SszStream::new(); + ssz.append(&h); + assert_eq!(ssz.drain(), vec![0; 20]); + } + #[test] fn test_ssz_encode_u8() { let x: u8 = 0; diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs new file mode 100644 index 000000000..c3fccb2bc --- /dev/null +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -0,0 +1,67 @@ +extern crate blake2_rfc; + +use self::blake2_rfc::blake2b::blake2b; + +use super::ethereum_types::{Address, H256}; +use super::{ssz_encode, TreeHash}; + +// I haven't added tests for tree_hash implementations that simply pass +// thru to the szz_encode lib for which tests already exist. Do we want +// test anyway? + +impl TreeHash for u8 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u16 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u32 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for u64 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for Address { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +impl TreeHash for H256 { + fn tree_hash(&self) -> Vec { + ssz_encode(self) + } +} + +// hash byte arrays +impl TreeHash for [u8] { + fn tree_hash(&self) -> Vec { + hash(&self) + } +} + +/** + * From the Spec: + * We define hash(x) as BLAKE2b-512(x)[0:32] + * From the python sample code: + * return blake2b(x).digest()[:32] + * + * This was orginally writting for blake2s before it was changed to blake2b + * Perhaps, we should be using 'canonical_hash' in the hashing lib? + */ +fn hash(data: &[u8]) -> Vec { + let result = blake2b(32, &[], &data); + result.as_bytes().to_vec() +} diff --git a/beacon_chain/utils/ssz/src/lib.rs b/beacon_chain/utils/ssz/src/lib.rs index f3a195e42..9f71e36f1 100644 --- a/beacon_chain/utils/ssz/src/lib.rs +++ b/beacon_chain/utils/ssz/src/lib.rs @@ -12,12 +12,15 @@ extern crate ethereum_types; pub mod decode; pub mod encode; +pub mod tree_hash; mod impl_decode; mod impl_encode; +mod impl_tree_hash; pub use decode::{decode_ssz, decode_ssz_list, Decodable, DecodeError}; pub use encode::{Encodable, SszStream}; +pub use tree_hash::{merkle_hash, TreeHash}; pub const LENGTH_BYTES: usize = 4; pub const MAX_LIST_SIZE: usize = 1 << (4 * 8); diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 41d7a9e4d..63b574049 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -1,6 +1,123 @@ -extern crate blake2_rfc; +const CHUNKSIZE: usize = 128; +const HASHSIZE: usize = 32; -use self::blake2_rfc::blake2s::blake2s; +pub trait TreeHash { + // Note: it would be nice to have a default trait implementation here + // i.e. szz_encode(self) - but rust complains it does not know + // the size of 'self'. Not sure if there's a way around this. + + fn tree_hash(&self) -> Vec; +} + +// python example: Note - I'm seeing some inconsistencies +// between this and the 'Tree Hash' section in the SSZ spec. +// So, I imagine it will change. +/* def merkle_hash(lst): + # Concatenate list into data + if len(lst[0]) != next_power_of_2(len(lst[0])): + lst = [extend_to_power_of_2(x) for x in lst] + data = b''.join(lst) + # Add padding + data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) + assert len(data) % CHUNKSIZE == 0 + # Store length (to compensate for non-bijectiveness of padding) + datalen = len(lst).to_bytes(32, 'big') + # Convert to chunks + chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] + chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE] + for i in range(len(chunkz)//2 - 1, 0, -1): + chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1]) + return hash(chunkz[1] + datalen) */ + +/** + * Returns a 32 byte hash of 'list', a vector of byte vectors. + * Note that this will consume 'list'. + * */ +pub fn merkle_hash(list: &mut Vec>) -> Vec { + // flatten list + let data = &mut list_to_blob(list); + + // data should be divisible by CHUNKSIZE + assert_eq!(data.len() % CHUNKSIZE, 0); + + // get data_len as bytes. It will hashed will the merkle root + let dlen = data.len() as u64; + let data_len_bytes = &mut dlen.tree_hash(); + data_len_bytes.resize(32, 0); + + // merklize + // + // From the Spec: + // while len(chunkz) > 1: + // if len(chunkz) % 2 == 1: + // chunkz.append(b'\x00' * SSZ_CHUNK_SIZE) + // chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] + let mut mhash = hash_level(data, CHUNKSIZE); + while mhash.len() > HASHSIZE { + mhash = hash_level(&mut mhash, HASHSIZE); + } + + assert_eq!(mhash.len(), HASHSIZE); + + mhash.append(data_len_bytes); + mhash.tree_hash() +} + +/** + * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into + * a byte vector of hashes, divisible by the 32 byte hashsize + */ +fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { + assert!(data.len() % chunk_size == 0); + + let mut result: Vec = Vec::new(); + for two_chunks in data.chunks(chunk_size * 2) { + if two_chunks.len() == chunk_size && data.len() > chunk_size { + // if there is only one chunk here, hash it with a zero-byte + // CHUNKSIZE vector + let mut c = two_chunks.to_vec(); + c.append(&mut vec![0; CHUNKSIZE]); + result.append(&mut c.tree_hash()); + } else { + result.append(&mut two_chunks.tree_hash()); + } + } + + result +} + +fn list_to_blob(list: &mut Vec>) -> Vec { + let mut data_len = 0; + if list[0].len().is_power_of_two() == false { + for x in list.iter_mut() { + extend_to_power_of_2(x); + data_len += x.len(); + } + } + + // do we need padding? + let extend_by = if data_len % CHUNKSIZE > 0 { + CHUNKSIZE - (data_len % CHUNKSIZE) + } else { + 0 + }; + + // allocate buffer and append each list element (flatten the vec of vecs) + data_len += extend_by; + let mut data: Vec = Vec::with_capacity(data_len); + for x in list.iter_mut() { + data.append(x); + } + + // add padding + let mut i = 0; + while i < extend_by { + data.push(0); + i += 1; + } + + data +} /** * Extends data length to a power of 2 by minimally right-zero-padding @@ -9,16 +126,10 @@ fn extend_to_power_of_2(data: &mut Vec) { let len = data.len(); let new_len = len.next_power_of_two(); if new_len > len { - data.append(&mut vec![0; new_len - len]); + data.resize(new_len, 0); } } -fn hash(data: Vec) -> Vec { - let result = blake2s(32, &[], data.as_slice()); - result.as_bytes().to_vec() -} - -// fn list_to_glob() {} #[cfg(test)] mod tests { use super::*; @@ -26,11 +137,21 @@ mod tests { #[test] fn test_extend_to_power_of_2() { let mut data = vec![1, 2, 3, 4, 5]; - - // an array length of 5 should be extended to - // a length of 8 (the next power of 2) by right - // padding it with 3 zeros extend_to_power_of_2(&mut data); assert_eq!(data, [1, 2, 3, 4, 5, 0, 0, 0]); } + + #[test] + fn test_merkle_hash() { + let data1 = vec![1; 100]; + let data2 = vec![2; 100]; + let data3 = vec![3; 100]; + let mut list = vec![data1, data2, data3]; + let result = merkle_hash(&mut list); + + //note: should test againt a known test hash value + assert_eq!(HASHSIZE, result.len()); + println!("merkle_hash: {:?}", result); + } + }