Use hardware acceleration for SHA256 (#2426)
## Proposed Changes Modify the SHA256 implementation in `eth2_hashing` so that it switches between `ring` and `sha2` to take advantage of [x86_64 SHA extensions](https://en.wikipedia.org/wiki/Intel_SHA_extensions). The extensions are available on modern Intel and AMD CPUs, and seem to provide a considerable speed-up: on my Ryzen 5950X it dropped state tree hashing times by about 30% from 35ms to 25ms (on Prater). ## Additional Info The extensions became available in the `sha2` crate [last year](https://www.reddit.com/r/rust/comments/hf2vcx/ann_rustcryptos_sha1_and_sha2_now_support/), and are not available in Ring, which uses a [pure Rust implementation of sha2](https://github.com/briansmith/ring/blob/main/src/digest/sha2.rs). Ring is faster on CPUs that lack the extensions so I've implemented a runtime switch to use `sha2` only when the extensions are available. The runtime switching seems to impose a miniscule penalty (see the benchmarks linked below).
This commit is contained in:
parent
a7b7134abb
commit
2c691af95b
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1941,6 +1941,7 @@ dependencies = [
|
||||
name = "eth2_hashing"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"cpufeatures",
|
||||
"lazy_static",
|
||||
"ring",
|
||||
"rustc-hex",
|
||||
@ -3736,6 +3737,7 @@ dependencies = [
|
||||
"directory",
|
||||
"env_logger 0.8.4",
|
||||
"environment",
|
||||
"eth2_hashing",
|
||||
"eth2_libp2p",
|
||||
"eth2_network_config",
|
||||
"futures",
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::Hash256;
|
||||
use eth2_hashing::{Context, SHA256};
|
||||
use eth2_hashing::{Context, Sha256Context};
|
||||
use std::cmp::max;
|
||||
|
||||
/// Return `p(index)` in a pseudorandom permutation `p` of `0...list_size-1` with ``seed`` as entropy.
|
||||
@ -54,7 +54,7 @@ fn do_round(seed: &[u8], index: usize, pivot: usize, round: u8, list_size: usize
|
||||
}
|
||||
|
||||
fn hash_with_round_and_position(seed: &[u8], round: u8, position: usize) -> Hash256 {
|
||||
let mut context = Context::new(&SHA256);
|
||||
let mut context = Context::new();
|
||||
|
||||
context.update(seed);
|
||||
context.update(&[round]);
|
||||
@ -64,17 +64,17 @@ fn hash_with_round_and_position(seed: &[u8], round: u8, position: usize) -> Hash
|
||||
*/
|
||||
context.update(&(position / 256).to_le_bytes()[0..4]);
|
||||
|
||||
let digest = context.finish();
|
||||
let digest = context.finalize();
|
||||
Hash256::from_slice(digest.as_ref())
|
||||
}
|
||||
|
||||
fn hash_with_round(seed: &[u8], round: u8) -> Hash256 {
|
||||
let mut context = Context::new(&SHA256);
|
||||
let mut context = Context::new();
|
||||
|
||||
context.update(seed);
|
||||
context.update(&[round]);
|
||||
|
||||
let digest = context.finish();
|
||||
let digest = context.finalize();
|
||||
Hash256::from_slice(digest.as_ref())
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::Hash256;
|
||||
use eth2_hashing::{Context, SHA256};
|
||||
use eth2_hashing::hash_fixed;
|
||||
use std::mem;
|
||||
|
||||
const SEED_SIZE: usize = 32;
|
||||
@ -31,12 +31,10 @@ impl Buf {
|
||||
/// Returns the new pivot. It is "raw" because it has not modulo the list size (this must be
|
||||
/// done by the caller).
|
||||
fn raw_pivot(&self) -> u64 {
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(&self.0[0..PIVOT_VIEW_SIZE]);
|
||||
let digest = context.finish();
|
||||
let digest = hash_fixed(&self.0[0..PIVOT_VIEW_SIZE]);
|
||||
|
||||
let mut bytes = [0; mem::size_of::<u64>()];
|
||||
bytes[..].copy_from_slice(&digest.as_ref()[0..mem::size_of::<u64>()]);
|
||||
bytes[..].copy_from_slice(&digest[0..mem::size_of::<u64>()]);
|
||||
u64::from_le_bytes(bytes)
|
||||
}
|
||||
|
||||
@ -47,10 +45,7 @@ impl Buf {
|
||||
|
||||
/// Hash the entire buffer.
|
||||
fn hash(&self) -> Hash256 {
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(&self.0[..]);
|
||||
let digest = context.finish();
|
||||
Hash256::from_slice(digest.as_ref())
|
||||
Hash256::from_slice(&hash_fixed(&self.0))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,8 +7,7 @@ pub use merkle_hasher::{Error, MerkleHasher};
|
||||
pub use merkleize_padded::merkleize_padded;
|
||||
pub use merkleize_standard::merkleize_standard;
|
||||
|
||||
use eth2_hashing::{Context, SHA256};
|
||||
use eth2_hashing::{ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
|
||||
use eth2_hashing::{hash_fixed, ZERO_HASHES, ZERO_HASHES_MAX_INDEX};
|
||||
|
||||
pub const BYTES_PER_CHUNK: usize = 32;
|
||||
pub const HASHSIZE: usize = 32;
|
||||
@ -39,11 +38,7 @@ pub fn merkle_root(bytes: &[u8], minimum_leaf_count: usize) -> Hash256 {
|
||||
let mut leaves = [0; HASHSIZE * 2];
|
||||
leaves[0..bytes.len()].copy_from_slice(bytes);
|
||||
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(&leaves);
|
||||
let digest = context.finish();
|
||||
|
||||
Hash256::from_slice(digest.as_ref())
|
||||
Hash256::from_slice(&hash_fixed(&leaves))
|
||||
} else {
|
||||
// If there are 3 or more leaves, use `MerkleHasher`.
|
||||
let mut hasher = MerkleHasher::with_leaves(leaves);
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::{get_zero_hash, Hash256, HASHSIZE};
|
||||
use eth2_hashing::{Context, Digest, SHA256};
|
||||
use eth2_hashing::{Context, Sha256Context, HASH_LEN};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::mem;
|
||||
|
||||
@ -15,7 +15,7 @@ pub enum Error {
|
||||
///
|
||||
/// Should be used as a left or right value for some node.
|
||||
enum Preimage<'a> {
|
||||
Digest(Digest),
|
||||
Digest([u8; HASH_LEN]),
|
||||
Slice(&'a [u8]),
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ struct HalfNode {
|
||||
impl HalfNode {
|
||||
/// Create a new half-node from the given `left` value.
|
||||
fn new(id: usize, left: Preimage) -> Self {
|
||||
let mut context = Context::new(&SHA256);
|
||||
let mut context = Context::new();
|
||||
context.update(left.as_bytes());
|
||||
|
||||
Self { context, id }
|
||||
@ -49,9 +49,9 @@ impl HalfNode {
|
||||
|
||||
/// Complete the half-node by providing a `right` value. Returns a digest of the left and right
|
||||
/// nodes.
|
||||
fn finish(mut self, right: Preimage) -> Digest {
|
||||
fn finish(mut self, right: Preimage) -> [u8; HASH_LEN] {
|
||||
self.context.update(right.as_bytes());
|
||||
self.context.finish()
|
||||
self.context.finalize()
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,7 +124,7 @@ pub struct MerkleHasher {
|
||||
/// Stores the nodes that are half-complete and awaiting a right node.
|
||||
///
|
||||
/// A smallvec of size 8 means we can hash a tree with 256 leaves without allocating on the
|
||||
/// heap. Each half-node is 224 bytes, so this smallvec may store 1,792 bytes on the stack.
|
||||
/// heap. Each half-node is 232 bytes, so this smallvec may store 1856 bytes on the stack.
|
||||
half_nodes: SmallVec8<HalfNode>,
|
||||
/// The depth of the tree that will be produced.
|
||||
///
|
||||
@ -368,7 +368,7 @@ mod test {
|
||||
fn context_size() {
|
||||
assert_eq!(
|
||||
mem::size_of::<HalfNode>(),
|
||||
216 + 8,
|
||||
232,
|
||||
"Halfnode size should be as expected"
|
||||
);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::{get_zero_hash, Hash256, BYTES_PER_CHUNK};
|
||||
use eth2_hashing::{hash, hash32_concat};
|
||||
use eth2_hashing::{hash32_concat, hash_fixed};
|
||||
|
||||
/// Merkleize `bytes` and return the root, optionally padding the tree out to `min_leaves` number of
|
||||
/// leaves.
|
||||
@ -79,7 +79,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 {
|
||||
// Hash two chunks, creating a parent chunk.
|
||||
let hash = match bytes.get(start..start + BYTES_PER_CHUNK * 2) {
|
||||
// All bytes are available, hash as usual.
|
||||
Some(slice) => hash(slice),
|
||||
Some(slice) => hash_fixed(slice),
|
||||
// Unable to get all the bytes, get a small slice and pad it out.
|
||||
None => {
|
||||
let mut preimage = bytes
|
||||
@ -87,7 +87,7 @@ pub fn merkleize_padded(bytes: &[u8], min_leaves: usize) -> Hash256 {
|
||||
.expect("`i` can only be larger than zero if there are bytes to read")
|
||||
.to_vec();
|
||||
preimage.resize(BYTES_PER_CHUNK * 2, 0);
|
||||
hash(&preimage)
|
||||
hash_fixed(&preimage)
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -8,12 +8,9 @@ description = "Hashing primitives used in Ethereum 2.0"
|
||||
|
||||
[dependencies]
|
||||
lazy_static = { version = "1.4.0", optional = true }
|
||||
|
||||
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
|
||||
ring = "0.16.19"
|
||||
|
||||
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
||||
sha2 = "0.9.1"
|
||||
sha2 = "0.9.5"
|
||||
cpufeatures = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
rustc-hex = "2.1.0"
|
||||
|
@ -1,62 +1,206 @@
|
||||
//! Provides a simple hash function utilizing `ring::digest::SHA256`.
|
||||
//! Optimized SHA256 for use in Ethereum 2.0.
|
||||
//!
|
||||
//! The purpose of this crate is to provide an abstraction to whatever hash function Ethereum
|
||||
//! 2.0 is using. The hash function has been subject to change during the specification process, so
|
||||
//! defining it once in this crate makes it easy to replace.
|
||||
//! The initial purpose of this crate was to provide an abstraction over the hash function used in
|
||||
//! Ethereum 2.0. The hash function changed during the specification process, so defining it once in
|
||||
//! this crate made it easy to replace.
|
||||
//!
|
||||
//! Now this crate serves primarily as a wrapper over two SHA256 crates: `sha2` and `ring` –
|
||||
//! which it switches between at runtime based on the availability of SHA intrinsics.
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
pub use ring::digest::{digest, Context, Digest, SHA256};
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
use sha2::{Digest, Sha256};
|
||||
pub use self::DynamicContext as Context;
|
||||
use sha2::Digest;
|
||||
|
||||
#[cfg(feature = "zero_hash_cache")]
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
/// Returns the digest of `input`.
|
||||
///
|
||||
/// Uses `ring::digest::SHA256`.
|
||||
/// Length of a SHA256 hash in bytes.
|
||||
pub const HASH_LEN: usize = 32;
|
||||
|
||||
/// Returns the digest of `input` using the best available implementation.
|
||||
pub fn hash(input: &[u8]) -> Vec<u8> {
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
let h = digest(&SHA256, input).as_ref().into();
|
||||
DynamicImpl::best().hash(input)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
let h = Sha256::digest(input).as_ref().into();
|
||||
|
||||
h
|
||||
/// Hash function returning a fixed-size array (to save on allocations).
|
||||
///
|
||||
/// Uses the best available implementation based on CPU features.
|
||||
pub fn hash_fixed(input: &[u8]) -> [u8; HASH_LEN] {
|
||||
DynamicImpl::best().hash_fixed(input)
|
||||
}
|
||||
|
||||
/// Compute the hash of two slices concatenated.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if either `h1` or `h2` are not 32 bytes in length.
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
pub fn hash32_concat(h1: &[u8], h2: &[u8]) -> [u8; 32] {
|
||||
let mut context = Context::new(&SHA256);
|
||||
context.update(h1);
|
||||
context.update(h2);
|
||||
|
||||
let mut output = [0; 32];
|
||||
output[..].copy_from_slice(context.finish().as_ref());
|
||||
output
|
||||
let mut ctxt = DynamicContext::new();
|
||||
ctxt.update(h1);
|
||||
ctxt.update(h2);
|
||||
ctxt.finalize()
|
||||
}
|
||||
|
||||
/// Compute the hash of two slices concatenated.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if either `h1` or `h2` are not 32 bytes in length.
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
pub fn hash32_concat(h1: &[u8], h2: &[u8]) -> [u8; 32] {
|
||||
let mut preimage = [0; 64];
|
||||
preimage[0..32].copy_from_slice(h1);
|
||||
preimage[32..64].copy_from_slice(h2);
|
||||
/// Context trait for abstracting over implementation contexts.
|
||||
pub trait Sha256Context {
|
||||
fn new() -> Self;
|
||||
|
||||
let mut output = [0; 32];
|
||||
output[..].copy_from_slice(&hash(&preimage));
|
||||
fn update(&mut self, bytes: &[u8]);
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN];
|
||||
}
|
||||
|
||||
/// Top-level trait implemented by both `sha2` and `ring` implementations.
|
||||
pub trait Sha256 {
|
||||
type Context: Sha256Context;
|
||||
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8>;
|
||||
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN];
|
||||
}
|
||||
|
||||
/// Implementation of SHA256 using the `sha2` crate (fastest on CPUs with SHA extensions).
|
||||
struct Sha2CrateImpl;
|
||||
|
||||
impl Sha256Context for sha2::Sha256 {
|
||||
fn new() -> Self {
|
||||
sha2::Digest::new()
|
||||
}
|
||||
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
sha2::Digest::update(self, bytes)
|
||||
}
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN] {
|
||||
sha2::Digest::finalize(self).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256 for Sha2CrateImpl {
|
||||
type Context = sha2::Sha256;
|
||||
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8> {
|
||||
Self::Context::digest(input).into_iter().collect()
|
||||
}
|
||||
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN] {
|
||||
Self::Context::digest(input).into()
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation of SHA256 using the `ring` crate (fastest on CPUs without SHA extensions).
|
||||
pub struct RingImpl;
|
||||
|
||||
impl Sha256Context for ring::digest::Context {
|
||||
fn new() -> Self {
|
||||
Self::new(&ring::digest::SHA256)
|
||||
}
|
||||
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
self.update(bytes)
|
||||
}
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN] {
|
||||
let mut output = [0; HASH_LEN];
|
||||
output.copy_from_slice(self.finish().as_ref());
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256 for RingImpl {
|
||||
type Context = ring::digest::Context;
|
||||
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8> {
|
||||
ring::digest::digest(&ring::digest::SHA256, input)
|
||||
.as_ref()
|
||||
.into()
|
||||
}
|
||||
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN] {
|
||||
let mut ctxt = Self::Context::new(&ring::digest::SHA256);
|
||||
ctxt.update(input);
|
||||
ctxt.finalize()
|
||||
}
|
||||
}
|
||||
|
||||
/// Default dynamic implementation that switches between available implementations.
|
||||
pub enum DynamicImpl {
|
||||
Sha2,
|
||||
Ring,
|
||||
}
|
||||
|
||||
// Runtime latch for detecting the availability of SHA extensions on x86_64.
|
||||
//
|
||||
// Inspired by the runtime switch within the `sha2` crate itself.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
cpufeatures::new!(x86_sha_extensions, "sha", "sse2", "ssse3", "sse4.1");
|
||||
|
||||
#[inline(always)]
|
||||
pub fn have_sha_extensions() -> bool {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
return x86_sha_extensions::get();
|
||||
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
return false;
|
||||
}
|
||||
|
||||
impl DynamicImpl {
|
||||
/// Choose the best available implementation based on the currently executing CPU.
|
||||
#[inline(always)]
|
||||
pub fn best() -> Self {
|
||||
if have_sha_extensions() {
|
||||
Self::Sha2
|
||||
} else {
|
||||
Self::Ring
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256 for DynamicImpl {
|
||||
type Context = DynamicContext;
|
||||
|
||||
#[inline(always)]
|
||||
fn hash(&self, input: &[u8]) -> Vec<u8> {
|
||||
match self {
|
||||
Self::Sha2 => Sha2CrateImpl.hash(input),
|
||||
Self::Ring => RingImpl.hash(input),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn hash_fixed(&self, input: &[u8]) -> [u8; HASH_LEN] {
|
||||
match self {
|
||||
Self::Sha2 => Sha2CrateImpl.hash_fixed(input),
|
||||
Self::Ring => RingImpl.hash_fixed(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Context encapsulating all implemenation contexts.
|
||||
///
|
||||
/// This enum ends up being 8 bytes larger than the largest inner context.
|
||||
pub enum DynamicContext {
|
||||
Sha2(sha2::Sha256),
|
||||
Ring(ring::digest::Context),
|
||||
}
|
||||
|
||||
impl Sha256Context for DynamicContext {
|
||||
fn new() -> Self {
|
||||
match DynamicImpl::best() {
|
||||
DynamicImpl::Sha2 => Self::Sha2(Sha256Context::new()),
|
||||
DynamicImpl::Ring => Self::Ring(Sha256Context::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
match self {
|
||||
Self::Sha2(ctxt) => Sha256Context::update(ctxt, bytes),
|
||||
Self::Ring(ctxt) => Sha256Context::update(ctxt, bytes),
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize(self) -> [u8; HASH_LEN] {
|
||||
match self {
|
||||
Self::Sha2(ctxt) => Sha256Context::finalize(ctxt),
|
||||
Self::Ring(ctxt) => Sha256Context::finalize(ctxt),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The max index that can be used with `ZERO_HASHES`.
|
||||
#[cfg(feature = "zero_hash_cache")]
|
||||
|
@ -16,8 +16,6 @@ modern = ["bls/supranational-force-adx"]
|
||||
milagro = ["bls/milagro"]
|
||||
# Support minimal spec (used for testing only).
|
||||
spec-minimal = []
|
||||
# Support spec v0.12 (used by Medalla testnet).
|
||||
spec-v12 = []
|
||||
|
||||
[dependencies]
|
||||
beacon_node = { "path" = "../beacon_node" }
|
||||
@ -26,6 +24,7 @@ slog = { version = "2.5.2", features = ["max_level_trace"] }
|
||||
sloggers = "1.0.1"
|
||||
types = { "path" = "../consensus/types" }
|
||||
bls = { path = "../crypto/bls" }
|
||||
eth2_hashing = "0.1.0"
|
||||
clap = "2.33.3"
|
||||
env_logger = "0.8.2"
|
||||
logging = { path = "../common/logging" }
|
||||
|
@ -7,6 +7,7 @@ use clap::{App, Arg, ArgMatches};
|
||||
use clap_utils::flags::DISABLE_MALLOC_TUNING_FLAG;
|
||||
use env_logger::{Builder, Env};
|
||||
use environment::EnvironmentBuilder;
|
||||
use eth2_hashing::have_sha_extensions;
|
||||
use eth2_network_config::{Eth2NetworkConfig, DEFAULT_HARDCODED_NETWORK};
|
||||
use lighthouse_version::VERSION;
|
||||
use malloc_utils::configure_memory_allocator;
|
||||
@ -43,10 +44,13 @@ fn main() {
|
||||
.long_version(
|
||||
format!(
|
||||
"{}\n\
|
||||
BLS Library: {}\n\
|
||||
Specs: mainnet (true), minimal ({}), v0.12.3 ({})",
|
||||
VERSION.replace("Lighthouse/", ""), bls_library_name(),
|
||||
cfg!(feature = "spec-minimal"), cfg!(feature = "spec-v12"),
|
||||
BLS library: {}\n\
|
||||
SHA256 hardware acceleration: {}\n\
|
||||
Specs: mainnet (true), minimal ({})",
|
||||
VERSION.replace("Lighthouse/", ""),
|
||||
bls_library_name(),
|
||||
have_sha_extensions(),
|
||||
cfg!(feature = "spec-minimal"),
|
||||
).as_str()
|
||||
)
|
||||
.arg(
|
||||
@ -205,7 +209,7 @@ fn main() {
|
||||
EthSpecId::Mainnet => run(EnvironmentBuilder::mainnet(), &matches, testnet_config),
|
||||
#[cfg(feature = "spec-minimal")]
|
||||
EthSpecId::Minimal => run(EnvironmentBuilder::minimal(), &matches, testnet_config),
|
||||
#[cfg(any(not(feature = "spec-minimal")))]
|
||||
#[cfg(not(feature = "spec-minimal"))]
|
||||
other => {
|
||||
eprintln!(
|
||||
"Eth spec `{}` is not supported by this build of Lighthouse",
|
||||
|
Loading…
Reference in New Issue
Block a user