Introducing ethash

2015-02-28 14:58:37 -05:00 · 2015-02-28 14:58:37 -05:00 · de9f79133f
commit de9f79133f
parent 080823bdee
52 changed files with 17425 additions and 70 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,6 +6,7 @@

 /tmp
 */**/*un~
+*/**/*.test
 *un~
 .DS_Store
 */**/.DS_Store
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@ -15,6 +15,11 @@
 			"Comment": "null-15",
 			"Rev": "12e4b4183793ac4b061921e7980845e750679fd0"
 		},
+		{
+			"ImportPath": "github.com/ethereum/ethash",
+			"Comment": "v17-23-g2561e13",
+			"Rev": "2561e1322a7e8e3d4a2cc903c44b1e96340bcb27"
+		},
 		{
 			"ImportPath": "github.com/ethereum/serpent-go",
 			"Rev": "5767a0dbd759d313df3f404dadb7f98d7ab51443"
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore
@ -0,0 +1,5 @@
+.idea/
+.DS_Store
+*/**/*un~
+.vagrant/
+cpp-build/
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt
@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 2.8.2)
+project(ethash)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+set(ETHHASH_LIBS ethash)
+
+if (WIN32 AND WANT_CRYPTOPP)
+	add_subdirectory(cryptopp)
+endif()
+
+add_subdirectory(libethash)
+add_subdirectory(libethash-cl EXCLUDE_FROM_ALL)
+add_subdirectory(benchmark EXCLUDE_FROM_ALL)
+add_subdirectory(test EXCLUDE_FROM_ALL)
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt
@ -0,0 +1,53 @@
+include_directories(..)
+
+set(CMAKE_BUILD_TYPE Release)
+
+if (MSVC)
+	add_definitions("/openmp")
+endif()
+
+if (NOT MPI_FOUND)
+    find_package(MPI)
+endif()
+
+if (NOT CRYPTOPP_FOUND)
+	find_package(CryptoPP 5.6.2)
+endif()
+
+if (CRYPTOPP_FOUND)
+	add_definitions(-DWITH_CRYPTOPP)
+endif()
+
+if (NOT OpenCL_FOUND)
+	find_package(OpenCL)
+endif()
+if (OpenCL_FOUND)
+	add_definitions(-DWITH_OPENCL)
+	include_directories(${OpenCL_INCLUDE_DIRS})
+	list(APPEND FILES ethash_cl_miner.cpp ethash_cl_miner.h)
+endif()
+
+if (MPI_FOUND)
+    include_directories(${MPI_INCLUDE_PATH})
+    add_executable (Benchmark_MPI_FULL benchmark.cpp)
+    target_link_libraries (Benchmark_MPI_FULL ${ETHHASH_LIBS} ${MPI_LIBRARIES})
+    SET_TARGET_PROPERTIES(Benchmark_MPI_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DFULL -DMPI")
+
+    add_executable (Benchmark_MPI_LIGHT benchmark.cpp)
+    target_link_libraries (Benchmark_MPI_LIGHT ${ETHHASH_LIBS} ${MPI_LIBRARIES})
+    SET_TARGET_PROPERTIES(Benchmark_MPI_LIGHT PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DMPI")
+endif()
+
+add_executable (Benchmark_FULL benchmark.cpp)
+target_link_libraries (Benchmark_FULL ${ETHHASH_LIBS})
+SET_TARGET_PROPERTIES(Benchmark_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DFULL")
+
+add_executable (Benchmark_LIGHT benchmark.cpp)
+target_link_libraries (Benchmark_LIGHT ${ETHHASH_LIBS})
+
+if (OpenCL_FOUND)
+	add_executable (Benchmark_CL benchmark.cpp)
+	target_link_libraries (Benchmark_CL ${ETHHASH_LIBS} ethash-cl)
+	SET_TARGET_PROPERTIES(Benchmark_CL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DOPENCL")
+endif()
+
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp
@ -0,0 +1,260 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file benchmark.cpp
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <libethash/ethash.h>
+#include <libethash/util.h>
+#ifdef OPENCL
+#include <libethash-cl/ethash_cl_miner.h>
+#endif
+#include <vector>
+#include <algorithm>
+
+#ifdef WITH_CRYPTOPP
+#include <libethash/SHA3_cryptopp.h>
+#include <string>
+
+#else
+#include "libethash/sha3.h"
+#endif // WITH_CRYPTOPP
+
+#undef min
+#undef max
+
+#if defined(OPENCL)
+const unsigned trials = 1024*1024*32;
+#elif defined(FULL)
+const unsigned trials = 1024*1024/8;
+#else
+const unsigned trials = 1024*1024/1024;
+#endif
+uint8_t g_hashes[1024*32];
+
+static char nibbleToChar(unsigned nibble)
+{
+	return (char) ((nibble >= 10 ? 'a'-10 : '0') + nibble);
+}
+
+static uint8_t charToNibble(char chr)
+{
+	if (chr >= '0' && chr <= '9')
+	{
+		return (uint8_t) (chr - '0');
+	}
+	if (chr >= 'a' && chr <= 'z')
+	{
+		return (uint8_t) (chr - 'a' + 10);
+	}
+	if (chr >= 'A' && chr <= 'Z')
+	{
+		return (uint8_t) (chr - 'A' + 10);
+	}
+	return 0;
+}
+
+static std::vector<uint8_t> hexStringToBytes(char const* str)
+{
+	std::vector<uint8_t> bytes(strlen(str) >> 1);
+	for (unsigned i = 0; i != bytes.size(); ++i)
+	{
+		bytes[i] = charToNibble(str[i*2 | 0]) << 4;
+		bytes[i] |= charToNibble(str[i*2 | 1]);
+	}
+	return bytes;
+}
+
+static std::string bytesToHexString(uint8_t const* bytes, unsigned size)
+{
+	std::string str;
+	for (unsigned i = 0; i != size; ++i)
+	{
+		str += nibbleToChar(bytes[i] >> 4);
+		str += nibbleToChar(bytes[i] & 0xf);
+	}
+	return str;
+}
+
+extern "C" int main(void)
+{
+	// params for ethash
+	ethash_params params;
+	ethash_params_init(&params, 0);
+	//params.full_size = 262147 * 4096;	// 1GBish;
+	//params.full_size = 32771 * 4096;	// 128MBish;
+	//params.full_size = 8209 * 4096;	// 8MBish;
+	//params.cache_size = 8209*4096;
+	//params.cache_size = 2053*4096;
+	uint8_t seed[32], previous_hash[32];
+
+	memcpy(seed, hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466").data(), 32);
+	memcpy(previous_hash, hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470").data(), 32);
+	
+	// allocate page aligned buffer for dataset
+#ifdef FULL
+	void* full_mem_buf = malloc(params.full_size + 4095);
+	void* full_mem = (void*)((uintptr_t(full_mem_buf) + 4095) & ~4095);
+#endif
+	void* cache_mem_buf = malloc(params.cache_size + 63);
+	void* cache_mem = (void*)((uintptr_t(cache_mem_buf) + 63) & ~63);
+
+	ethash_cache cache;
+	cache.mem = cache_mem;
+	
+	// compute cache or full data
+	{
+		clock_t startTime = clock();
+		ethash_mkcache(&cache, &params, seed);
+		clock_t time = clock() - startTime;
+
+		uint8_t cache_hash[32];
+		SHA3_256(cache_hash, (uint8_t const*)cache_mem, params.cache_size);
+		debugf("ethash_mkcache: %ums, sha3: %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(cache_hash,sizeof(cache_hash)).data());
+
+		// print a couple of test hashes
+		{
+			const clock_t startTime = clock();
+			ethash_return_value hash;
+			ethash_light(&hash, &cache, &params, previous_hash, 0);
+			const clock_t time = clock() - startTime;
+			debugf("ethash_light test: %ums, %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data());
+		}
+
+		#ifdef FULL
+			startTime = clock();
+			ethash_compute_full_data(full_mem, &params, &cache);
+			time = clock() - startTime;
+			debugf("ethash_compute_full_data: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC));
+		#endif // FULL
+	}
+
+#ifdef OPENCL
+	ethash_cl_miner miner;
+	{
+		const clock_t startTime = clock();
+		if (!miner.init(params, seed))
+			exit(-1);
+		const clock_t time = clock() - startTime;
+        debugf("ethash_cl_miner init: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC));
+	}
+#endif
+
+
+#ifdef FULL
+    {
+        const clock_t startTime = clock();
+		ethash_return_value hash;
+        ethash_full(&hash, full_mem, &params, previous_hash, 0);
+        const clock_t time = clock() - startTime;
+        debugf("ethash_full test: %uns, %s\n", (unsigned)((time*1000000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data());
+    }
+#endif
+
+#ifdef OPENCL
+	// validate 1024 hashes against CPU
+	miner.hash(g_hashes, previous_hash, 0, 1024);
+	for (unsigned i = 0; i != 1024; ++i)
+	{
+		ethash_return_value hash;
+		ethash_light(&hash, &cache, &params, previous_hash, i);
+		if (memcmp(hash.result, g_hashes + 32*i, 32) != 0)
+		{
+			debugf("nonce %u failed: %s %s\n", i, bytesToHexString(g_hashes + 32*i, 32).c_str(), bytesToHexString(hash.result, 32).c_str());
+			static unsigned c = 0;
+			if (++c == 16)
+			{
+				exit(-1);
+			}
+		}
+	}
+#endif
+
+
+	clock_t startTime = clock();
+	unsigned hash_count = trials;
+	
+	#ifdef OPENCL
+	{
+		struct search_hook : ethash_cl_miner::search_hook
+		{
+			unsigned hash_count;
+			std::vector<uint64_t> nonce_vec;
+
+			virtual bool found(uint64_t const* nonces, uint32_t count)
+			{
+				nonce_vec.assign(nonces, nonces + count);
+				return false;
+			}
+
+			virtual bool searched(uint64_t start_nonce, uint32_t count)
+			{
+				// do nothing
+				hash_count += count;
+				return hash_count >= trials;
+			}
+		};
+		search_hook hook;
+		hook.hash_count = 0;
+
+		miner.search(previous_hash, 0x000000ffffffffff, hook);
+
+		for (unsigned i = 0; i != hook.nonce_vec.size(); ++i)
+		{
+			uint64_t nonce = hook.nonce_vec[i];
+			ethash_return_value hash;
+			ethash_light(&hash, &cache, &params, previous_hash, nonce);
+			debugf("found: %.8x%.8x -> %s\n", unsigned(nonce>>32), unsigned(nonce), bytesToHexString(hash.result, 32).c_str());
+		}
+
+		hash_count = hook.hash_count;
+	}
+	#else
+	{
+		//#pragma omp parallel for
+		for (int nonce = 0; nonce < trials; ++nonce)
+		{
+			ethash_return_value hash;
+			#ifdef FULL
+				ethash_full(&hash, full_mem, &params, previous_hash, nonce);
+			#else
+				ethash_light(&hash, &cache, &params, previous_hash, nonce);
+			#endif // FULL
+		}
+	}
+	#endif
+	
+	clock_t time = std::max((clock_t)1u, clock() - startTime);
+	
+	unsigned read_size = ACCESSES * MIX_BYTES;
+	debugf(			
+		"hashrate: %8u, bw: %6u MB/s\n",
+		(unsigned)(((uint64_t)hash_count*CLOCKS_PER_SEC)/time),
+		(unsigned)((((uint64_t)hash_count*read_size*CLOCKS_PER_SEC)/time) / (1024*1024))
+		);
+
+	free(cache_mem_buf);
+#ifdef FULL
+	free(full_mem_buf);
+#endif
+
+	return 0;
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake
@ -0,0 +1,108 @@
+# Module for locating the Crypto++ encryption library.
+#
+# Customizable variables:
+#   CRYPTOPP_ROOT_DIR
+#     This variable points to the CryptoPP root directory. On Windows the
+#     library location typically will have to be provided explicitly using the
+#     -D command-line option. The directory should include the include/cryptopp,
+#     lib and/or bin sub-directories.
+#
+# Read-only variables:
+#   CRYPTOPP_FOUND
+#     Indicates whether the library has been found.
+#
+#   CRYPTOPP_INCLUDE_DIRS
+#     Points to the CryptoPP include directory.
+#
+#   CRYPTOPP_LIBRARIES
+#     Points to the CryptoPP libraries that should be passed to
+#     target_link_libararies.
+#
+#
+# Copyright (c) 2012 Sergiu Dotenco
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+INCLUDE (FindPackageHandleStandardArgs)
+
+FIND_PATH (CRYPTOPP_ROOT_DIR
+  NAMES cryptopp/cryptlib.h include/cryptopp/cryptlib.h
+  PATHS ENV CRYPTOPPROOT
+  DOC "CryptoPP root directory")
+
+# Re-use the previous path:
+FIND_PATH (CRYPTOPP_INCLUDE_DIR
+  NAMES cryptopp/cryptlib.h
+  HINTS ${CRYPTOPP_ROOT_DIR}
+  PATH_SUFFIXES include
+  DOC "CryptoPP include directory")
+
+FIND_LIBRARY (CRYPTOPP_LIBRARY_DEBUG
+  NAMES cryptlibd cryptoppd
+  HINTS ${CRYPTOPP_ROOT_DIR}
+  PATH_SUFFIXES lib
+  DOC "CryptoPP debug library")
+
+FIND_LIBRARY (CRYPTOPP_LIBRARY_RELEASE
+  NAMES cryptlib cryptopp
+  HINTS ${CRYPTOPP_ROOT_DIR}
+  PATH_SUFFIXES lib
+  DOC "CryptoPP release library")
+
+IF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE)
+  SET (CRYPTOPP_LIBRARY
+    optimized ${CRYPTOPP_LIBRARY_RELEASE}
+    debug ${CRYPTOPP_LIBRARY_DEBUG} CACHE DOC "CryptoPP library")
+ELSEIF (CRYPTOPP_LIBRARY_RELEASE)
+  SET (CRYPTOPP_LIBRARY ${CRYPTOPP_LIBRARY_RELEASE} CACHE DOC
+    "CryptoPP library")
+ENDIF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE)
+
+IF (CRYPTOPP_INCLUDE_DIR)
+  SET (_CRYPTOPP_VERSION_HEADER ${CRYPTOPP_INCLUDE_DIR}/cryptopp/config.h)
+
+  IF (EXISTS ${_CRYPTOPP_VERSION_HEADER})
+    FILE (STRINGS ${_CRYPTOPP_VERSION_HEADER} _CRYPTOPP_VERSION_TMP REGEX
+      "^#define CRYPTOPP_VERSION[ \t]+[0-9]+$")
+
+    STRING (REGEX REPLACE
+      "^#define CRYPTOPP_VERSION[ \t]+([0-9]+)" "\\1" _CRYPTOPP_VERSION_TMP
+      ${_CRYPTOPP_VERSION_TMP})
+
+    STRING (REGEX REPLACE "([0-9]+)[0-9][0-9]" "\\1" CRYPTOPP_VERSION_MAJOR
+      ${_CRYPTOPP_VERSION_TMP})
+    STRING (REGEX REPLACE "[0-9]([0-9])[0-9]" "\\1" CRYPTOPP_VERSION_MINOR
+      ${_CRYPTOPP_VERSION_TMP})
+    STRING (REGEX REPLACE "[0-9][0-9]([0-9])" "\\1" CRYPTOPP_VERSION_PATCH
+      ${_CRYPTOPP_VERSION_TMP})
+
+    SET (CRYPTOPP_VERSION_COUNT 3)
+    SET (CRYPTOPP_VERSION
+      ${CRYPTOPP_VERSION_MAJOR}.${CRYPTOPP_VERSION_MINOR}.${CRYPTOPP_VERSION_PATCH})
+  ENDIF (EXISTS ${_CRYPTOPP_VERSION_HEADER})
+ENDIF (CRYPTOPP_INCLUDE_DIR)
+
+SET (CRYPTOPP_INCLUDE_DIRS ${CRYPTOPP_INCLUDE_DIR})
+SET (CRYPTOPP_LIBRARIES ${CRYPTOPP_LIBRARY})
+
+MARK_AS_ADVANCED (CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY CRYPTOPP_LIBRARY_DEBUG
+  CRYPTOPP_LIBRARY_RELEASE)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS (CryptoPP REQUIRED_VARS CRYPTOPP_ROOT_DIR
+  CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY VERSION_VAR CRYPTOPP_VERSION)
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake
@ -0,0 +1,91 @@
+#
+#  This file taken from FindOpenCL project @ http://gitorious.com/findopencl
+#
+# - Try to find OpenCL
+# This module tries to find an OpenCL implementation on your system. It supports
+# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too.
+#
+# Once done this will define
+#  OPENCL_FOUND        - system has OpenCL
+#  OPENCL_INCLUDE_DIRS  - the OpenCL include directory
+#  OPENCL_LIBRARIES    - link these to use OpenCL
+#
+# WIN32 should work, but is untested
+
+FIND_PACKAGE( PackageHandleStandardArgs )
+
+SET (OPENCL_VERSION_STRING "0.1.0")
+SET (OPENCL_VERSION_MAJOR 0)
+SET (OPENCL_VERSION_MINOR 1)
+SET (OPENCL_VERSION_PATCH 0)
+
+IF (APPLE)
+
+  FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
+  FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
+  FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX")
+
+ELSE (APPLE)
+
+	IF (WIN32)
+	
+	    FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h)
+	    FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp)
+	
+	    # The AMD SDK currently installs both x86 and x86_64 libraries
+	    # This is only a hack to find out architecture
+	    IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
+	    	SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64")
+			SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86_64")
+	    ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
+	    	SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86")
+	   		SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86")
+	    ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
+
+	    # find out if the user asked for a 64-bit build, and use the corresponding 
+	    # 64 or 32 bit NVIDIA library paths to the search:
+	    STRING(REGEX MATCH "Win64" ISWIN64 ${CMAKE_GENERATOR})
+	    IF("${ISWIN64}" STREQUAL "Win64") 
+	    	FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/x64)
+	    ELSE("${ISWIN64}" STREQUAL "Win64") 
+	    	FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/Win32)
+	    ENDIF("${ISWIN64}" STREQUAL "Win64") 
+
+	    GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
+	    
+	    # On Win32 search relative to the library
+	    FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include)
+	    FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include)
+	
+	ELSE (WIN32)
+
+            # Unix style platforms
+            FIND_LIBRARY(OPENCL_LIBRARIES OpenCL
+              ENV LD_LIBRARY_PATH
+            )
+
+            GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
+            GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
+
+            # The AMD SDK currently does not place its headers
+            # in /usr/include, therefore also search relative
+            # to the library
+            FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include")
+            FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include")
+
+	ENDIF (WIN32)
+
+ENDIF (APPLE)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
+
+IF( _OPENCL_CPP_INCLUDE_DIRS )
+	SET( OPENCL_HAS_CPP_BINDINGS TRUE )
+	LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
+	# This is often the same, so clean up
+	LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
+ENDIF( _OPENCL_CPP_INCLUDE_DIRS )
+
+MARK_AS_ADVANCED(
+  OPENCL_INCLUDE_DIRS
+)
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt
@ -0,0 +1,13 @@
+set(LIBRARY cryptopp)
+
+include_directories(../../cryptopp)
+
+# todo, subset
+file(GLOB HEADERS "../../cryptopp/*.h")
+file(GLOB SOURCE "../../cryptopp/*.cpp")
+
+add_library(${LIBRARY} ${HEADERS} ${SOURCE})
+
+set(CRYPTOPP_INCLUDE_DIRS "../.." PARENT_SCOPE)
+set(CRYPTOPP_LIBRARIES ${LIBRARY} PARENT_SCOPE)
+set(CRYPTOPP_FOUND TRUE PARENT_SCOPE)
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
@ -0,0 +1,296 @@
+package ethash
+
+/*
+#cgo CFLAGS: -std=gnu99 -Wall
+#include "libethash/ethash.h"
+#include "libethash/util.c"
+#include "libethash/internal.c"
+#include "libethash/sha3.c"
+*/
+import "C"
+
+import (
+	"bytes"
+	"encoding/binary"
+	"log"
+	"math/big"
+	"math/rand"
+	"sync"
+	"time"
+	"unsafe"
+
+	"github.com/ethereum/go-ethereum/logger"
+	"github.com/ethereum/go-ethereum/pow"
+)
+
+var powlogger = logger.NewLogger("POW")
+
+type DAG struct {
+	SeedBlockNum uint64
+	dag          unsafe.Pointer // full GB of memory for dag
+}
+
+type ParamsAndCache struct {
+	params       *C.ethash_params
+	cache        *C.ethash_cache
+	SeedBlockNum uint64
+}
+
+type Ethash struct {
+	turbo          bool
+	HashRate       int64
+	chainManager   pow.ChainManager
+	dag            *DAG
+	paramsAndCache *ParamsAndCache
+	nextdag        unsafe.Pointer
+	ret            *C.ethash_return_value
+	dagMutex       *sync.Mutex
+	cacheMutex     *sync.Mutex
+}
+
+func parseNonce(nonce []byte) (uint64, error) {
+	nonceBuf := bytes.NewBuffer(nonce)
+	nonceInt, err := binary.ReadUvarint(nonceBuf)
+	if err != nil {
+		return 0, err
+	}
+	return nonceInt, nil
+}
+
+const epochLength uint64 = 30000
+
+func getSeedBlockNum(blockNum uint64) uint64 {
+	var seedBlockNum uint64 = 0
+	if blockNum >= 2*epochLength {
+		seedBlockNum = ((blockNum / epochLength) - 1) * epochLength
+	}
+	return seedBlockNum
+}
+
+func makeParamsAndCache(chainManager pow.ChainManager, blockNum uint64) *ParamsAndCache {
+	seedBlockNum := getSeedBlockNum(blockNum)
+	paramsAndCache := &ParamsAndCache{
+		params:       new(C.ethash_params),
+		cache:        new(C.ethash_cache),
+		SeedBlockNum: seedBlockNum,
+	}
+	C.ethash_params_init(paramsAndCache.params, C.uint32_t(seedBlockNum))
+	paramsAndCache.cache.mem = C.malloc(paramsAndCache.params.cache_size)
+	seedHash := chainManager.GetBlockByNumber(seedBlockNum).Header().Hash()
+	log.Println("Params", paramsAndCache.params)
+
+	log.Println("Making Cache")
+	start := time.Now()
+	C.ethash_mkcache(paramsAndCache.cache, paramsAndCache.params, (*C.uint8_t)((unsafe.Pointer)(&seedHash[0])))
+	log.Println("Took:", time.Since(start))
+	return paramsAndCache
+}
+
+func (pow *Ethash) updateCache() {
+	pow.cacheMutex.Lock()
+	seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64())
+	if pow.paramsAndCache.SeedBlockNum != seedNum {
+		pow.paramsAndCache = makeParamsAndCache(pow.chainManager, pow.chainManager.CurrentBlock().NumberU64())
+	}
+	pow.cacheMutex.Unlock()
+}
+
+func makeDAG(p *ParamsAndCache) *DAG {
+	d := &DAG{
+		dag:          C.malloc(p.params.full_size),
+		SeedBlockNum: p.SeedBlockNum,
+	}
+	C.ethash_compute_full_data(d.dag, p.params, p.cache)
+	return d
+}
+
+func (pow *Ethash) updateDAG() {
+	pow.cacheMutex.Lock()
+	pow.dagMutex.Lock()
+
+	seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64())
+	if pow.dag == nil || pow.dag.SeedBlockNum != seedNum {
+		pow.dag = nil
+		log.Println("Making Dag")
+		start := time.Now()
+		pow.dag = makeDAG(pow.paramsAndCache)
+		log.Println("Took:", time.Since(start))
+	}
+
+	pow.dagMutex.Unlock()
+	pow.cacheMutex.Unlock()
+}
+
+func New(chainManager pow.ChainManager) *Ethash {
+	return &Ethash{
+		turbo:          false,
+		paramsAndCache: makeParamsAndCache(chainManager, chainManager.CurrentBlock().NumberU64()),
+		chainManager:   chainManager,
+		dag:            nil,
+		ret:            new(C.ethash_return_value),
+		cacheMutex:     new(sync.Mutex),
+		dagMutex:       new(sync.Mutex),
+	}
+}
+
+func (pow *Ethash) DAGSize() uint64 {
+	return uint64(pow.paramsAndCache.params.full_size)
+}
+
+func (pow *Ethash) CacheSize() uint64 {
+	return uint64(pow.paramsAndCache.params.cache_size)
+}
+
+func (pow *Ethash) GetSeedHash(blockNum uint64) []byte {
+	return pow.chainManager.GetBlockByNumber(getSeedBlockNum(blockNum)).Header().Hash()
+}
+
+func (pow *Ethash) Stop() {
+	pow.cacheMutex.Lock()
+	pow.dagMutex.Lock()
+	if pow.paramsAndCache.cache != nil {
+		C.free(pow.paramsAndCache.cache.mem)
+	}
+	if pow.dag != nil {
+		C.free(pow.dag.dag)
+	}
+	pow.dagMutex.Unlock()
+	pow.cacheMutex.Unlock()
+}
+
+func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
+	pow.updateDAG()
+
+	// Not very elegant, multiple mining instances are not supported
+	pow.dagMutex.Lock()
+	pow.cacheMutex.Lock()
+	defer pow.cacheMutex.Unlock()
+	defer pow.dagMutex.Unlock()
+
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	miningHash := block.HashNoNonce()
+	diff := block.Difficulty()
+	log.Println("difficulty", diff)
+	i := int64(0)
+	start := time.Now().UnixNano()
+	t := time.Now()
+
+	nonce := uint64(r.Int63())
+
+	for {
+		select {
+		case <-stop:
+			powlogger.Infoln("Breaking from mining")
+			pow.HashRate = 0
+			pow.dagMutex.Unlock()
+			return nil, nil, nil
+		default:
+			i++
+
+			if time.Since(t) > (1 * time.Second) {
+				elapsed := time.Now().UnixNano() - start
+				hashes := ((float64(1e9) / float64(elapsed)) * float64(i)) / 1000
+				pow.HashRate = int64(hashes)
+				powlogger.Infoln("Hashing @", pow.HashRate, "khash")
+
+				t = time.Now()
+			}
+
+			cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+			cnonce := C.uint64_t(nonce)
+			log.Printf("seed hash, nonce: %x %x\n", miningHash, nonce)
+			// pow.hash is the output/return of ethash_full
+			C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce)
+			res := C.ethash_check_difficulty((*C.uint8_t)(&pow.ret.result[0]), (*C.uint8_t)(unsafe.Pointer(&diff.Bytes()[0])))
+			if res == 1 {
+				mixDigest := C.GoBytes(unsafe.Pointer(&pow.ret.mix_hash[0]), 32)
+				// We don't really nead 32 bytes here
+				buf := make([]byte, 32)
+				binary.PutUvarint(buf, nonce)
+				return buf, mixDigest, pow.GetSeedHash(block.NumberU64())
+			}
+			nonce += 1
+		}
+
+		if !pow.turbo {
+			time.Sleep(20 * time.Microsecond)
+		}
+	}
+}
+
+func (pow *Ethash) Verify(block pow.Block) bool {
+	// Make sure the SeedHash is set correctly
+	if bytes.Compare(block.SeedHash(), pow.GetSeedHash(block.NumberU64())) != 0 {
+		log.Println("Block had wrong SeedHash")
+		log.Println("Expected: ", pow.GetSeedHash(block.NumberU64()))
+		log.Println("Actual: ", block.SeedHash())
+		return false
+	}
+
+	nonceInt, err := parseNonce(block.Nonce())
+	if err != nil {
+		log.Println("nonce to int err:", err)
+		return false
+	}
+	return pow.verify(block.HashNoNonce(), block.MixDigest(), block.Difficulty(), block.NumberU64(), nonceInt)
+}
+
+func (pow *Ethash) verify(hash []byte, mixDigest []byte, difficulty *big.Int, blockNum uint64, nonce uint64) bool {
+	// First check: make sure header, mixDigest, nonce are correct without hitting the DAG
+	// This is to prevent DOS attacks
+	chash := (*C.uint8_t)(unsafe.Pointer(&hash))
+	cnonce := C.uint64_t(nonce)
+	cmixDigest := (*C.uint8_t)(unsafe.Pointer(&mixDigest))
+	cdifficulty := (*C.uint8_t)(unsafe.Pointer(&difficulty.Bytes()[0]))
+	if C.ethash_quick_check_difficulty(chash, cnonce, cmixDigest, cdifficulty) != 1 {
+		log.Println("Failed to pass quick check.  Are you sure that the mix digest is correct?")
+		return false
+	}
+
+	var pAc *ParamsAndCache
+	// If its an old block (doesn't use the current cache)
+	// get the cache for it but don't update (so we don't need the mutex)
+	// Otherwise, it's the current block or a future.
+	// If current, updateCache will do nothing.
+	if getSeedBlockNum(blockNum) < pow.paramsAndCache.SeedBlockNum {
+		pAc = makeParamsAndCache(pow.chainManager, blockNum)
+	} else {
+		pow.updateCache()
+		pow.cacheMutex.Lock()
+		defer pow.cacheMutex.Unlock()
+		pAc = pow.paramsAndCache
+	}
+
+	C.ethash_light(pow.ret, pAc.cache, pAc.params, chash, cnonce)
+	res := C.ethash_check_difficulty((*C.uint8_t)(unsafe.Pointer(&pow.ret.result[0])), cdifficulty)
+	return res == 1
+}
+
+func (pow *Ethash) GetHashrate() int64 {
+	return pow.HashRate
+}
+
+func (pow *Ethash) Turbo(on bool) {
+	pow.turbo = on
+}
+
+func (pow *Ethash) FullHash(nonce uint64, miningHash []byte) []byte {
+	pow.updateDAG()
+	pow.dagMutex.Lock()
+	defer pow.dagMutex.Unlock()
+	cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+	cnonce := C.uint64_t(nonce)
+	log.Println("seed hash, nonce:", miningHash, nonce)
+	// pow.hash is the output/return of ethash_full
+	C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce)
+	ghash_full := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32)
+	return ghash_full
+}
+
+func (pow *Ethash) LightHash(nonce uint64, miningHash []byte) []byte {
+	cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+	cnonce := C.uint64_t(nonce)
+	C.ethash_light(pow.ret, pow.paramsAndCache.cache, pow.paramsAndCache.params, cMiningHash, cnonce)
+	ghash_light := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32)
+	return ghash_light
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt
@ -0,0 +1,12 @@
+set(LIBRARY ethash-cl)
+set(CMAKE_BUILD_TYPE Release)
+
+if (NOT OPENCL_FOUND)
+	find_package(OpenCL)
+endif()
+if (OPENCL_FOUND)
+	include_directories(${OPENCL_INCLUDE_DIRS})
+	include_directories(..)
+	add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h)
+	TARGET_LINK_LIBRARIES(${LIBRARY} ${OPENCL_LIBRARIES} ethash)
+endif()
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp
@ -0,0 +1,754 @@
+/*
+  This file is part of c-ethash.
+
+  c-ethash is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  c-ethash is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file ethash_cl_miner.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @date 2015
+*/
+
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include <assert.h>
+#include <queue>
+#include "ethash_cl_miner.h"
+#include <libethash/util.h>
+
+#undef min
+#undef max
+
+#define HASH_BYTES 32
+
+static char const ethash_inner_code[] = R"(
+
+// author Tim Hughes <tim@twistedfury.com>
+// Tested on Radeon HD 7850
+// Hashrate: 15940347 hashes/s
+// Bandwidth: 124533 MB/s
+// search kernel should fit in <= 84 VGPRS (3 wavefronts)
+
+#define THREADS_PER_HASH (128 / 16)
+#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
+
+#define FNV_PRIME	0x01000193
+
+__constant uint2 const Keccak_f1600_RC[24] = {
+	(uint2)(0x00000001, 0x00000000),
+	(uint2)(0x00008082, 0x00000000),
+	(uint2)(0x0000808a, 0x80000000),
+	(uint2)(0x80008000, 0x80000000),
+	(uint2)(0x0000808b, 0x00000000),
+	(uint2)(0x80000001, 0x00000000),
+	(uint2)(0x80008081, 0x80000000),
+	(uint2)(0x00008009, 0x80000000),
+	(uint2)(0x0000008a, 0x00000000),
+	(uint2)(0x00000088, 0x00000000),
+	(uint2)(0x80008009, 0x00000000),
+	(uint2)(0x8000000a, 0x00000000),
+	(uint2)(0x8000808b, 0x00000000),
+	(uint2)(0x0000008b, 0x80000000),
+	(uint2)(0x00008089, 0x80000000),
+	(uint2)(0x00008003, 0x80000000),
+	(uint2)(0x00008002, 0x80000000),
+	(uint2)(0x00000080, 0x80000000),
+	(uint2)(0x0000800a, 0x00000000),
+	(uint2)(0x8000000a, 0x80000000),
+	(uint2)(0x80008081, 0x80000000),
+	(uint2)(0x00008080, 0x80000000),
+	(uint2)(0x80000001, 0x00000000),
+	(uint2)(0x80008008, 0x80000000),
+};
+
+void keccak_f1600_round(uint2* a, uint r, uint out_size)
+{
+   #if !__ENDIAN_LITTLE__
+	for (uint i = 0; i != 25; ++i)
+		a[i] = a[i].yx;
+   #endif
+
+	uint2 b[25];
+	uint2 t;
+
+	// Theta
+	b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
+	b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
+	b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
+	b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
+	b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
+	t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
+	a[0] ^= t;
+	a[5] ^= t;
+	a[10] ^= t;
+	a[15] ^= t;
+	a[20] ^= t;
+	t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
+	a[1] ^= t;
+	a[6] ^= t;
+	a[11] ^= t;
+	a[16] ^= t;
+	a[21] ^= t;
+	t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
+	a[2] ^= t;
+	a[7] ^= t;
+	a[12] ^= t;
+	a[17] ^= t;
+	a[22] ^= t;
+	t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
+	a[3] ^= t;
+	a[8] ^= t;
+	a[13] ^= t;
+	a[18] ^= t;
+	a[23] ^= t;
+	t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
+	a[4] ^= t;
+	a[9] ^= t;
+	a[14] ^= t;
+	a[19] ^= t;
+	a[24] ^= t;
+
+	// Rho Pi
+	b[0] = a[0];
+	b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
+	b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
+	b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
+	b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
+	b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
+	b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
+	b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
+	b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
+	b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
+	b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
+	b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
+	b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
+	b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
+	b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
+	b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
+	b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
+	b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
+	b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
+	b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
+	b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
+	b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
+	b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
+	b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
+	b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
+
+	// Chi
+	a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
+	a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
+	a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
+	a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
+	if (out_size >= 4)
+	{
+		a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
+		a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
+		a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
+		a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
+		a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
+		if (out_size >= 8)
+		{
+			a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
+			a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
+			a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
+			a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
+			a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
+			a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
+			a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
+			a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
+			a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
+			a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
+			a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
+			a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
+			a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
+			a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
+			a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
+			a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
+		}
+	}
+
+	// Iota
+	a[0] ^= Keccak_f1600_RC[r];
+
+   #if !__ENDIAN_LITTLE__
+	for (uint i = 0; i != 25; ++i)
+		a[i] = a[i].yx;
+   #endif
+}
+
+void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
+{
+	for (uint i = in_size; i != 25; ++i)
+	{
+		a[i] = 0;
+	}
+#if __ENDIAN_LITTLE__
+	a[in_size] ^= 0x0000000000000001;
+	a[24-out_size*2] ^= 0x8000000000000000;
+#else
+	a[in_size] ^= 0x0100000000000000;
+	a[24-out_size*2] ^= 0x0000000000000080;
+#endif
+
+	// Originally I unrolled the first and last rounds to interface
+	// better with surrounding code, however I haven't done this
+	// without causing the AMD compiler to blow up the VGPR usage.
+	uint r = 0;
+	do
+	{
+		// This dynamic branch stops the AMD compiler unrolling the loop
+		// and additionally saves about 33% of the VGPRs, enough to gain another
+		// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
+		// massage out of the compiler. It doesn't really seem to matter how
+		// much we try and help the compiler save VGPRs because it seems to throw
+		// that information away, hence the implementation of keccak here
+		// doesn't bother.
+		if (isolate) 
+		{
+			keccak_f1600_round((uint2*)a, r++, 25);
+		}
+	}
+	while (r < 23);
+	
+	// final round optimised for digest size
+	keccak_f1600_round((uint2*)a, r++, out_size);
+}
+
+#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
+
+#define countof(x) (sizeof(x) / sizeof(x[0]))
+
+uint fnv(uint x, uint y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+uint4 fnv4(uint4 x, uint4 y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+uint fnv_reduce(uint4 v)
+{
+	return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
+}
+
+typedef union
+{
+	ulong ulongs[32 / sizeof(ulong)];
+	uint uints[32 / sizeof(uint)];
+} hash32_t;
+
+typedef union
+{
+	ulong ulongs[64 / sizeof(ulong)];
+	uint4 uint4s[64 / sizeof(uint4)];
+} hash64_t;
+
+typedef union
+{
+	uint uints[128 / sizeof(uint)];
+	uint4 uint4s[128 / sizeof(uint4)];
+} hash128_t;
+
+hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
+{
+	hash64_t init;
+	uint const init_size = countof(init.ulongs);
+	uint const hash_size = countof(header->ulongs);
+	
+	// sha3_512(header .. nonce)
+	ulong state[25];
+	copy(state, header->ulongs, hash_size);
+	state[hash_size] = nonce;
+	keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
+
+	copy(init.ulongs, state, init_size);
+	return init;
+}
+
+uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
+{
+	uint4 mix = init;
+
+	// share init0
+	if (thread_id == 0)
+		*share = mix.x;
+	barrier(CLK_LOCAL_MEM_FENCE);
+	uint init0 = *share;
+
+	uint a = 0;
+	do
+	{
+		bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
+
+		#pragma unroll
+		for (uint i = 0; i != 4; ++i)
+		{
+			if (update_share)
+			{
+				uint m[4] = { mix.x, mix.y, mix.z, mix.w };
+				*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
+			}
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
+		}
+	}
+	while ((a += 4) != (ACCESSES & isolate));
+
+	return fnv_reduce(mix);
+}
+
+hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
+{
+	ulong state[25];
+
+	hash32_t hash;
+	uint const hash_size = countof(hash.ulongs);
+	uint const init_size = countof(init->ulongs);
+	uint const mix_size = countof(mix->ulongs);
+
+	// keccak_256(keccak_512(header..nonce) .. mix);
+	copy(state, init->ulongs, init_size);
+	copy(state + init_size, mix->ulongs, mix_size);
+	keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
+
+	// copy out
+	copy(hash.ulongs, state, hash_size);
+	return hash;
+}
+
+hash32_t compute_hash_simple(
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong nonce,
+	uint isolate
+	)
+{
+	hash64_t init = init_hash(g_header, nonce, isolate);
+
+	hash128_t mix;
+	for (uint i = 0; i != countof(mix.uint4s); ++i)
+	{
+		mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
+	}
+	
+	uint mix_val = mix.uints[0];
+	uint init0 = mix.uints[0];
+	uint a = 0;
+	do
+	{
+		uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
+		uint n = (a+1) % countof(mix.uints);
+
+		#pragma unroll
+		for (uint i = 0; i != countof(mix.uints); ++i)
+		{
+			mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
+			mix_val = i == n ? mix.uints[i] : mix_val;
+		}
+	}
+	while (++a != (ACCESSES & isolate));
+
+	// reduce to output
+	hash32_t fnv_mix;
+	for (uint i = 0; i != countof(fnv_mix.uints); ++i)
+	{
+		fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
+	}
+	
+	return final_hash(&init, &fnv_mix, isolate);
+}
+
+typedef union
+{
+	struct
+	{
+		hash64_t init;
+		uint pad; // avoid lds bank conflicts
+	};
+	hash32_t mix;
+} compute_hash_share;
+
+hash32_t compute_hash(
+	__local compute_hash_share* share,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong nonce,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+
+	// Compute one init hash per work item.
+	hash64_t init = init_hash(g_header, nonce, isolate);
+
+	// Threads work together in this phase in groups of 8.
+	uint const thread_id = gid % THREADS_PER_HASH;
+	uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
+
+	hash32_t mix;
+	uint i = 0;
+	do
+	{
+		// share init with other threads
+		if (i == thread_id)
+			share[hash_id].init = init;
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
+
+		share[hash_id].mix.uints[thread_id] = thread_mix;
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		if (i == thread_id)
+			mix = share[hash_id].mix;
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+	while (++i != (THREADS_PER_HASH & isolate));
+
+	return final_hash(&init, &mix, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash_simple(
+	__global hash32_t* g_hashes,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+	g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search_simple(
+	__global volatile uint* restrict g_output,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	ulong target,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+	hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+
+	if (hash.ulongs[countof(hash.ulongs)-1] < target)
+	{
+		uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+		g_output[slot] = gid;
+	}
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash(
+	__global hash32_t* g_hashes,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+	g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search(
+	__global volatile uint* restrict g_output,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	ulong target,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+	hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+
+	if (hash.ulongs[countof(hash.ulongs)-1] < target)
+	{
+		uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+		g_output[slot] = gid;
+	}
+}
+
+)";
+
+static void add_definition(std::string& source, char const* id, unsigned value)
+{
+	char buf[256];
+	sprintf(buf, "#define %s %uu\n", id, value);
+	source.insert(source.begin(), buf, buf + strlen(buf));
+}
+
+ethash_cl_miner::ethash_cl_miner()
+{
+}
+
+bool ethash_cl_miner::init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size)
+{
+	// store params
+	m_params = params;
+
+	// get all platforms
+    std::vector<cl::Platform> platforms;
+    cl::Platform::get(&platforms);
+	if (platforms.empty())
+	{
+		debugf("No OpenCL platforms found.\n");
+		return false;
+	}
+
+	// use default platform
+	debugf("Using platform: %s\n", platforms[0].getInfo<CL_PLATFORM_NAME>().c_str());
+
+    // get GPU device of the default platform
+    std::vector<cl::Device> devices;
+    platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+    if (devices.empty())
+	{
+		debugf("No OpenCL devices found.\n");
+		return false;
+	}
+
+	// use default device
+	cl::Device& device = devices[0];
+	debugf("Using device: %s\n", device.getInfo<CL_DEVICE_NAME>().c_str());
+
+	// create context
+	m_context = cl::Context({device});
+	m_queue = cl::CommandQueue(m_context, device);
+
+	// use requested workgroup size, but we require multiple of 8
+	m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
+
+	// patch source code
+	std::string code = ethash_inner_code;
+	add_definition(code, "GROUP_SIZE", m_workgroup_size);
+	add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / MIX_BYTES));
+	add_definition(code, "ACCESSES", ACCESSES);
+	add_definition(code, "MAX_OUTPUTS", c_max_search_results);
+	//debugf("%s", code.c_str());
+
+	// create miner OpenCL program
+	cl::Program::Sources sources;
+	sources.push_back({code.c_str(), code.size()});
+
+	cl::Program program(m_context, sources);
+	try
+	{
+		program.build({device});
+	}
+	catch (cl::Error err)
+	{
+		debugf("%s\n", program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
+		return false;
+	}
+	m_hash_kernel = cl::Kernel(program, "ethash_hash");
+	m_search_kernel = cl::Kernel(program, "ethash_search");
+
+	// create buffer for dag
+	m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size);
+	
+	// create buffer for header
+	m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
+
+	// compute dag on CPU
+	{
+		void* cache_mem = malloc(params.cache_size + 63);
+		ethash_cache cache;
+		cache.mem = (void*)(((uintptr_t)cache_mem + 63) & ~63);
+		ethash_mkcache(&cache, &params, seed);
+
+		// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
+		void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size);
+		ethash_compute_full_data(dag_ptr, &params, &cache);
+		m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
+
+		free(cache_mem);
+	}
+
+	// create mining buffers
+	for (unsigned i = 0; i != c_num_buffers; ++i)
+	{
+		m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 32*c_hash_batch_size);
+		m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
+	}
+	return true;
+}
+
+void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
+{
+	struct pending_batch
+	{
+		unsigned base;
+		unsigned count;
+		unsigned buf;
+	};
+	std::queue<pending_batch> pending;
+	
+	// update header constant buffer
+	m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
+
+	/*
+	__kernel void ethash_combined_hash(
+		__global hash32_t* g_hashes,
+		__constant hash32_t const* g_header,
+		__global hash128_t const* g_dag,
+		ulong start_nonce,
+		uint isolate
+		)
+	*/
+	m_hash_kernel.setArg(1, m_header);
+	m_hash_kernel.setArg(2, m_dag);
+	m_hash_kernel.setArg(3, nonce);
+	m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
+
+	unsigned buf = 0;
+	for (unsigned i = 0; i < count || !pending.empty(); )
+	{
+		// how many this batch
+		if (i < count)
+		{
+			unsigned const this_count = std::min(count - i, c_hash_batch_size);
+			unsigned const batch_count = std::max(this_count, m_workgroup_size);
+
+			// supply output hash buffer to kernel
+			m_hash_kernel.setArg(0, m_hash_buf[buf]);
+
+			// execute it!
+			clock_t start_time = clock();
+			m_queue.enqueueNDRangeKernel(
+				m_hash_kernel,
+				cl::NullRange,
+				cl::NDRange(batch_count),
+				cl::NDRange(m_workgroup_size)
+				);
+			m_queue.flush();
+		
+			pending.push({i, this_count, buf});
+			i += this_count;
+			buf = (buf + 1) % c_num_buffers;
+		}
+
+		// read results
+		if (i == count || pending.size() == c_num_buffers)
+		{
+			pending_batch const& batch = pending.front();
+
+			// could use pinned host pointer instead, but this path isn't that important.
+			uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * HASH_BYTES);
+			memcpy(ret + batch.base*HASH_BYTES, hashes, batch.count*HASH_BYTES);
+			m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);
+
+			pending.pop();
+		}
+	}
+}
+
+
+void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
+{
+	struct pending_batch
+	{
+		uint64_t start_nonce;
+		unsigned buf;
+	};
+	std::queue<pending_batch> pending;
+
+	static uint32_t const c_zero = 0;
+
+	// update header constant buffer
+	m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
+	for (unsigned i = 0; i != c_num_buffers; ++i)
+	{
+		m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
+	}
+	cl::Event pre_return_event;
+	m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
+
+	/*
+	__kernel void ethash_combined_search(
+		__global hash32_t* g_hashes,			// 0
+		__constant hash32_t const* g_header,	// 1
+		__global hash128_t const* g_dag,		// 2
+		ulong start_nonce,						// 3
+		ulong target,							// 4
+		uint isolate							// 5
+	)
+	*/
+	m_search_kernel.setArg(1, m_header);
+	m_search_kernel.setArg(2, m_dag);
+
+	// pass these to stop the compiler unrolling the loops
+	m_search_kernel.setArg(4, target);
+	m_search_kernel.setArg(5, ~0u);
+
+
+	unsigned buf = 0;
+	for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size)
+	{
+		// supply output buffer to kernel
+		m_search_kernel.setArg(0, m_search_buf[buf]);
+		m_search_kernel.setArg(3, start_nonce);
+
+		// execute it!
+		m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
+		
+		pending.push({start_nonce, buf});
+		buf = (buf + 1) % c_num_buffers;
+
+		// read results
+		if (pending.size() == c_num_buffers)
+		{
+			pending_batch const& batch = pending.front();
+
+			// could use pinned host pointer instead
+			uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
+			unsigned num_found = std::min(results[0], c_max_search_results);
+
+			uint64_t nonces[c_max_search_results];
+			for (unsigned i = 0; i != num_found; ++i)
+			{
+				nonces[i] = batch.start_nonce + results[i+1];
+			}
+			
+			m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
+			
+			bool exit = num_found && hook.found(nonces, num_found);
+			exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
+			if (exit)
+				break;
+
+			pending.pop();
+		}
+	}
+
+	// not safe to return until this is ready
+	pre_return_event.wait();
+}
+
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h
@ -0,0 +1,43 @@
+#pragma once
+
+#define __CL_ENABLE_EXCEPTIONS 
+#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+#include "cl.hpp"
+#include <time.h>
+#include <libethash/ethash.h>
+
+class ethash_cl_miner
+{
+public:
+	struct search_hook
+	{
+		// reports progress, return true to abort
+		virtual bool found(uint64_t const* nonces, uint32_t count) = 0;
+		virtual bool searched(uint64_t start_nonce, uint32_t count) = 0;
+	};
+
+public:
+	ethash_cl_miner();
+
+	bool init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size = 64);
+
+	void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
+	void search(uint8_t const* header, uint64_t target, search_hook& hook);
+
+private:
+	static unsigned const c_max_search_results = 63;
+	static unsigned const c_num_buffers = 2;
+	static unsigned const c_hash_batch_size = 1024;
+	static unsigned const c_search_batch_size = 1024*256;
+
+	ethash_params m_params;
+	cl::Context m_context;
+	cl::CommandQueue m_queue;
+	cl::Kernel m_hash_kernel;
+	cl::Kernel m_search_kernel;
+	cl::Buffer m_dag;
+	cl::Buffer m_header;
+	cl::Buffer m_hash_buf[c_num_buffers];
+	cl::Buffer m_search_buf[c_num_buffers];
+	unsigned m_workgroup_size;
+};
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt
@ -0,0 +1,15 @@
+find_package(CUDA)
+ 
+# Pass options to NVCC
+
+ 
+if (CUDA_FOUND)
+set(CUDA_NVCC_FLAGS "   -gencode;arch=compute_30,code=sm_30;
+                        -gencode;arch=compute_20,code=sm_20;
+                        -gencode;arch=compute_11,code=sm_11;
+                        -gencode;arch=compute_12,code=sm_12;
+                        -gencode;arch=compute_13,code=sm_13;")
+cuda_add_executable(
+    ethash-cuda
+    libethash.cu)
+endif()
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu
@ -0,0 +1,879 @@
+/*
+	Copyright 2009 NVIDIA Corporation.  All rights reserved.
+
+	NOTICE TO LICENSEE:   
+
+	This source code and/or documentation ("Licensed Deliverables") are subject 
+	to NVIDIA intellectual property rights under U.S. and international Copyright 
+	laws.  
+
+	These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL 
+	to NVIDIA and is being provided under the terms and conditions of a form of 
+	NVIDIA software license agreement by and between NVIDIA and Licensee ("License 
+	Agreement") or electronically accepted by Licensee.  Notwithstanding any terms 
+	or conditions to the contrary in the License Agreement, reproduction or 
+	disclosure of the Licensed Deliverables to any third party without the express 
+	written consent of NVIDIA is prohibited.     
+
+	NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, 
+	NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED 
+	DELIVERABLES FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED 
+	WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE 
+	LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
+	NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   NOTWITHSTANDING ANY 
+	TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL 
+	NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, 
+	OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,	WHETHER 
+	IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF 
+	OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.  
+
+	U.S. Government End Users. These Licensed Deliverables are a "commercial item" 
+	as that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
+	"commercial computer  software"  and "commercial computer software documentation" 
+	as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the 
+	U.S. Government only as a commercial end item.  Consistent with 48 C.F.R.12.212 
+	and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government 
+	End Users acquire the Licensed Deliverables with only those rights set forth 
+	herein. 
+
+	Any use of the Licensed Deliverables in individual and commercial software must 
+	include, in the user documentation and internal comments to the code, the above 
+	Disclaimer and U.S. Government End Users Notice.
+ */
+
+/*
+ *	cuPrintf.cu
+ *
+ *	This is a printf command callable from within a kernel. It is set
+ *	up so that output is sent to a memory buffer, which is emptied from
+ *	the host side - but only after a cudaThreadSynchronize() on the host.
+ *
+ *	Currently, there is a limitation of around 200 characters of output
+ *	and no more than 10 arguments to a single cuPrintf() call. Issue
+ *	multiple calls if longer format strings are required.
+ *
+ *	It requires minimal setup, and is *NOT* optimised for performance.
+ *	For example, writes are not coalesced - this is because there is an
+ *	assumption that people will not want to printf from every single one
+ *	of thousands of threads, but only from individual threads at a time.
+ *
+ *	Using this is simple - it requires one host-side call to initialise
+ *	everything, and then kernels can call cuPrintf at will. Sample code
+ *	is the easiest way to demonstrate:
+ *
+	#include "cuPrintf.cu"
+ 	
+	__global__ void testKernel(int val)
+	{
+		cuPrintf("Value is: %d\n", val);
+	}
+
+	int main()
+	{
+		cudaPrintfInit();
+		testKernel<<< 2, 3 >>>(10);
+		cudaPrintfDisplay(stdout, true);
+		cudaPrintfEnd();
+        return 0;
+	}
+ *
+ *	See the header file, "cuPrintf.cuh" for more info, especially
+ *	arguments to cudaPrintfInit() and cudaPrintfDisplay();
+ */
+
+#ifndef CUPRINTF_CU
+#define CUPRINTF_CU
+
+#include "cuPrintf.cuh"
+#if __CUDA_ARCH__ > 100      // Atomics only used with > sm_10 architecture
+#include <sm_11_atomic_functions.h>
+#endif
+
+// This is the smallest amount of memory, per-thread, which is allowed.
+// It is also the largest amount of space a single printf() can take up
+const static int CUPRINTF_MAX_LEN = 256;
+
+// This structure is used internally to track block/thread output restrictions.
+typedef struct __align__(8) {
+	int threadid;				// CUPRINTF_UNRESTRICTED for unrestricted
+	int blockid;				// CUPRINTF_UNRESTRICTED for unrestricted
+} cuPrintfRestriction;
+
+// The main storage is in a global print buffer, which has a known
+// start/end/length. These are atomically updated so it works as a
+// circular buffer.
+// Since the only control primitive that can be used is atomicAdd(),
+// we cannot wrap the pointer as such. The actual address must be
+// calculated from printfBufferPtr by mod-ing with printfBufferLength.
+// For sm_10 architecture, we must subdivide the buffer per-thread
+// since we do not even have an atomic primitive.
+__constant__ static char *globalPrintfBuffer = NULL;         // Start of circular buffer (set up by host)
+__constant__ static int printfBufferLength = 0;              // Size of circular buffer (set up by host)
+__device__ static cuPrintfRestriction restrictRules;         // Output restrictions
+__device__ volatile static char *printfBufferPtr = NULL;     // Current atomically-incremented non-wrapped offset
+
+// This is the header preceeding all printf entries.
+// NOTE: It *must* be size-aligned to the maximum entity size (size_t)
+typedef struct __align__(8) {
+    unsigned short magic;                   // Magic number says we're valid
+    unsigned short fmtoffset;               // Offset of fmt string into buffer
+    unsigned short blockid;                 // Block ID of author
+    unsigned short threadid;                // Thread ID of author
+} cuPrintfHeader;
+
+// Special header for sm_10 architecture
+#define CUPRINTF_SM10_MAGIC   0xC810        // Not a valid ascii character
+typedef struct __align__(16) {
+    unsigned short magic;                   // sm_10 specific magic number
+    unsigned short unused;
+    unsigned int thread_index;              // thread ID for this buffer
+    unsigned int thread_buf_len;            // per-thread buffer length
+    unsigned int offset;                    // most recent printf's offset
+} cuPrintfHeaderSM10;
+
+
+// Because we can't write an element which is not aligned to its bit-size,
+// we have to align all sizes and variables on maximum-size boundaries.
+// That means sizeof(double) in this case, but we'll use (long long) for
+// better arch<1.3 support
+#define CUPRINTF_ALIGN_SIZE      sizeof(long long)
+
+// All our headers are prefixed with a magic number so we know they're ready
+#define CUPRINTF_SM11_MAGIC  (unsigned short)0xC811        // Not a valid ascii character
+
+
+//
+//  getNextPrintfBufPtr
+//
+//  Grabs a block of space in the general circular buffer, using an
+//  atomic function to ensure that it's ours. We handle wrapping
+//  around the circular buffer and return a pointer to a place which
+//  can be written to.
+//
+//  Important notes:
+//      1. We always grab CUPRINTF_MAX_LEN bytes
+//      2. Because of 1, we never worry about wrapping around the end
+//      3. Because of 1, printfBufferLength *must* be a factor of CUPRINTF_MAX_LEN
+//
+//  This returns a pointer to the place where we own.
+//
+__device__ static char *getNextPrintfBufPtr()
+{
+    // Initialisation check
+    if(!printfBufferPtr)
+        return NULL;
+
+	// Thread/block restriction check
+	if((restrictRules.blockid != CUPRINTF_UNRESTRICTED) && (restrictRules.blockid != (blockIdx.x + gridDim.x*blockIdx.y)))
+		return NULL;
+	if((restrictRules.threadid != CUPRINTF_UNRESTRICTED) && (restrictRules.threadid != (threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z)))
+		return NULL;
+
+	// Conditional section, dependent on architecture
+#if __CUDA_ARCH__ == 100
+    // For sm_10 architectures, we have no atomic add - this means we must split the
+    // entire available buffer into per-thread blocks. Inefficient, but what can you do.
+    int thread_count = (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z);
+    int thread_index = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z +
+                       (blockIdx.x + gridDim.x*blockIdx.y) * (blockDim.x * blockDim.y * blockDim.z);
+    
+    // Find our own block of data and go to it. Make sure the per-thread length
+	// is a precise multiple of CUPRINTF_MAX_LEN, otherwise we risk size and
+	// alignment issues! We must round down, of course.
+    unsigned int thread_buf_len = printfBufferLength / thread_count;
+	thread_buf_len &= ~(CUPRINTF_MAX_LEN-1);
+
+	// We *must* have a thread buffer length able to fit at least two printfs (one header, one real)
+	if(thread_buf_len < (CUPRINTF_MAX_LEN * 2))
+		return NULL;
+
+	// Now address our section of the buffer. The first item is a header.
+    char *myPrintfBuffer = globalPrintfBuffer + (thread_buf_len * thread_index);
+    cuPrintfHeaderSM10 hdr = *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer;
+    if(hdr.magic != CUPRINTF_SM10_MAGIC)
+    {
+        // If our header is not set up, initialise it
+        hdr.magic = CUPRINTF_SM10_MAGIC;
+        hdr.thread_index = thread_index;
+        hdr.thread_buf_len = thread_buf_len;
+        hdr.offset = 0;         // Note we start at 0! We pre-increment below.
+        *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer = hdr;       // Write back the header
+
+        // For initial setup purposes, we might need to init thread0's header too
+        // (so that cudaPrintfDisplay() below will work). This is only run once.
+        cuPrintfHeaderSM10 *tophdr = (cuPrintfHeaderSM10 *)(void *)globalPrintfBuffer;
+        tophdr->thread_buf_len = thread_buf_len;
+    }
+
+    // Adjust the offset by the right amount, and wrap it if need be
+    unsigned int offset = hdr.offset + CUPRINTF_MAX_LEN;
+    if(offset >= hdr.thread_buf_len)
+        offset = CUPRINTF_MAX_LEN;
+
+    // Write back the new offset for next time and return a pointer to it
+    ((cuPrintfHeaderSM10 *)(void *)myPrintfBuffer)->offset = offset;
+    return myPrintfBuffer + offset;
+#else
+    // Much easier with an atomic operation!
+    size_t offset = atomicAdd((unsigned int *)&printfBufferPtr, CUPRINTF_MAX_LEN) - (size_t)globalPrintfBuffer;
+    offset %= printfBufferLength;
+    return globalPrintfBuffer + offset;
+#endif
+}
+
+
+//
+//  writePrintfHeader
+//
+//  Inserts the header for containing our UID, fmt position and
+//  block/thread number. We generate it dynamically to avoid
+//	issues arising from requiring pre-initialisation.
+//
+__device__ static void writePrintfHeader(char *ptr, char *fmtptr)
+{
+    if(ptr)
+    {
+        cuPrintfHeader header;
+        header.magic = CUPRINTF_SM11_MAGIC;
+        header.fmtoffset = (unsigned short)(fmtptr - ptr);
+        header.blockid = blockIdx.x + gridDim.x*blockIdx.y;
+        header.threadid = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z;
+        *(cuPrintfHeader *)(void *)ptr = header;
+    }
+}
+
+
+//
+//  cuPrintfStrncpy
+//
+//  This special strncpy outputs an aligned length value, followed by the
+//  string. It then zero-pads the rest of the string until a 64-aligned
+//  boundary. The length *includes* the padding. A pointer to the byte
+//  just after the \0 is returned.
+//
+//  This function could overflow CUPRINTF_MAX_LEN characters in our buffer.
+//  To avoid it, we must count as we output and truncate where necessary.
+//
+__device__ static char *cuPrintfStrncpy(char *dest, const char *src, int n, char *end)
+{
+    // Initialisation and overflow check
+    if(!dest || !src || (dest >= end))
+        return NULL;
+
+    // Prepare to write the length specifier. We're guaranteed to have
+    // at least "CUPRINTF_ALIGN_SIZE" bytes left because we only write out in
+    // chunks that size, and CUPRINTF_MAX_LEN is aligned with CUPRINTF_ALIGN_SIZE.
+    int *lenptr = (int *)(void *)dest;
+    int len = 0;
+    dest += CUPRINTF_ALIGN_SIZE;
+
+    // Now copy the string
+    while(n--)
+    {
+        if(dest >= end)     // Overflow check
+            break;
+
+        len++;
+        *dest++ = *src;
+        if(*src++ == '\0')
+            break;
+    }
+
+    // Now write out the padding bytes, and we have our length.
+    while((dest < end) && (((long)dest & (CUPRINTF_ALIGN_SIZE-1)) != 0))
+    {
+        len++;
+        *dest++ = 0;
+    }
+    *lenptr = len;
+    return (dest < end) ? dest : NULL;        // Overflow means return NULL
+}
+
+
+//
+//  copyArg
+//
+//  This copies a length specifier and then the argument out to the
+//  data buffer. Templates let the compiler figure all this out at
+//  compile-time, making life much simpler from the programming
+//  point of view. I'm assuimg all (const char *) is a string, and
+//  everything else is the variable it points at. I'd love to see
+//  a better way of doing it, but aside from parsing the format
+//  string I can't think of one.
+//
+//  The length of the data type is inserted at the beginning (so that
+//  the display can distinguish between float and double), and the
+//  pointer to the end of the entry is returned.
+//
+__device__ static char *copyArg(char *ptr, const char *arg, char *end)
+{
+    // Initialisation check
+    if(!ptr || !arg)
+        return NULL;
+
+    // strncpy does all our work. We just terminate.
+    if((ptr = cuPrintfStrncpy(ptr, arg, CUPRINTF_MAX_LEN, end)) != NULL)
+        *ptr = 0;
+
+    return ptr;
+}
+
+template <typename T>
+__device__ static char *copyArg(char *ptr, T &arg, char *end)
+{
+    // Initisalisation and overflow check. Alignment rules mean that
+    // we're at least CUPRINTF_ALIGN_SIZE away from "end", so we only need
+    // to check that one offset.
+    if(!ptr || ((ptr+CUPRINTF_ALIGN_SIZE) >= end))
+        return NULL;
+
+    // Write the length and argument
+    *(int *)(void *)ptr = sizeof(arg);
+    ptr += CUPRINTF_ALIGN_SIZE;
+    *(T *)(void *)ptr = arg;
+    ptr += CUPRINTF_ALIGN_SIZE;
+    *ptr = 0;
+
+    return ptr;
+}
+
+
+//
+//  cuPrintf
+//
+//  Templated printf functions to handle multiple arguments.
+//  Note we return the total amount of data copied, not the number
+//  of characters output. But then again, who ever looks at the
+//  return from printf() anyway?
+//
+//  The format is to grab a block of circular buffer space, the
+//  start of which will hold a header and a pointer to the format
+//  string. We then write in all the arguments, and finally the
+//  format string itself. This is to make it easy to prevent
+//  overflow of our buffer (we support up to 10 arguments, each of
+//  which can be 12 bytes in length - that means that only the
+//  format string (or a %s) can actually overflow; so the overflow
+//  check need only be in the strcpy function.
+//
+//  The header is written at the very last because that's what
+//  makes it look like we're done.
+//
+//  Errors, which are basically lack-of-initialisation, are ignored
+//  in the called functions because NULL pointers are passed around
+//
+
+// All printf variants basically do the same thing, setting up the
+// buffer, writing all arguments, then finalising the header. For
+// clarity, we'll pack the code into some big macros.
+#define CUPRINTF_PREAMBLE \
+    char *start, *end, *bufptr, *fmtstart; \
+    if((start = getNextPrintfBufPtr()) == NULL) return 0; \
+    end = start + CUPRINTF_MAX_LEN; \
+    bufptr = start + sizeof(cuPrintfHeader);
+
+// Posting an argument is easy
+#define CUPRINTF_ARG(argname) \
+	bufptr = copyArg(bufptr, argname, end);
+
+// After args are done, record start-of-fmt and write the fmt and header
+#define CUPRINTF_POSTAMBLE \
+    fmtstart = bufptr; \
+    end = cuPrintfStrncpy(bufptr, fmt, CUPRINTF_MAX_LEN, end); \
+    writePrintfHeader(start, end ? fmtstart : NULL); \
+    return end ? (int)(end - start) : 0;
+
+__device__ int cuPrintf(const char *fmt)
+{
+	CUPRINTF_PREAMBLE;
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8)
+{
+	CUPRINTF_PREAMBLE;
+
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+	CUPRINTF_ARG(arg8);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+	CUPRINTF_ARG(arg8);
+	CUPRINTF_ARG(arg9);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+	CUPRINTF_ARG(arg8);
+	CUPRINTF_ARG(arg9);
+	CUPRINTF_ARG(arg10);
+
+	CUPRINTF_POSTAMBLE;
+}
+#undef CUPRINTF_PREAMBLE
+#undef CUPRINTF_ARG
+#undef CUPRINTF_POSTAMBLE
+
+
+//
+//	cuPrintfRestrict
+//
+//	Called to restrict output to a given thread/block.
+//	We store the info in "restrictRules", which is set up at
+//	init time by the host. It's not the cleanest way to do this
+//	because it means restrictions will last between
+//	invocations, but given the output-pointer continuity,
+//	I feel this is reasonable.
+//
+__device__ void cuPrintfRestrict(int threadid, int blockid)
+{
+    int thread_count = blockDim.x * blockDim.y * blockDim.z;
+	if(((threadid < thread_count) && (threadid >= 0)) || (threadid == CUPRINTF_UNRESTRICTED))
+		restrictRules.threadid = threadid;
+
+	int block_count = gridDim.x * gridDim.y;
+	if(((blockid < block_count) && (blockid >= 0)) || (blockid == CUPRINTF_UNRESTRICTED))
+		restrictRules.blockid = blockid;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// HOST SIDE
+
+#include <stdio.h>
+static FILE *printf_fp;
+
+static char *printfbuf_start=NULL;
+static char *printfbuf_device=NULL;
+static int printfbuf_len=0;
+
+
+//
+//  outputPrintfData
+//
+//  Our own internal function, which takes a pointer to a data buffer
+//  and passes it through libc's printf for output.
+//
+//  We receive the formate string and a pointer to where the data is
+//  held. We then run through and print it out.
+//
+//  Returns 0 on failure, 1 on success
+//
+static int outputPrintfData(char *fmt, char *data)
+{
+    // Format string is prefixed by a length that we don't need
+    fmt += CUPRINTF_ALIGN_SIZE;
+
+    // Now run through it, printing everything we can. We must
+    // run to every % character, extract only that, and use printf
+    // to format it.
+    char *p = strchr(fmt, '%');
+    while(p != NULL)
+    {
+        // Print up to the % character
+        *p = '\0';
+        fputs(fmt, printf_fp);
+        *p = '%';           // Put back the %
+
+        // Now handle the format specifier
+        char *format = p++;         // Points to the '%'
+        p += strcspn(p, "%cdiouxXeEfgGaAnps");
+        if(*p == '\0')              // If no format specifier, print the whole thing
+        {
+            fmt = format;
+            break;
+        }
+
+        // Cut out the format bit and use printf to print it. It's prefixed
+        // by its length.
+        int arglen = *(int *)data;
+        if(arglen > CUPRINTF_MAX_LEN)
+        {
+            fputs("Corrupt printf buffer data - aborting\n", printf_fp);
+            return 0;
+        }
+
+        data += CUPRINTF_ALIGN_SIZE;
+        
+        char specifier = *p++;
+        char c = *p;        // Store for later
+        *p = '\0';
+        switch(specifier)
+        {
+            // These all take integer arguments
+            case 'c':
+            case 'd':
+            case 'i':
+            case 'o':
+            case 'u':
+            case 'x':
+            case 'X':
+            case 'p':
+                fprintf(printf_fp, format, *((int *)data));
+                break;
+
+            // These all take double arguments
+            case 'e':
+            case 'E':
+            case 'f':
+            case 'g':
+            case 'G':
+            case 'a':
+            case 'A':
+                if(arglen == 4)     // Float vs. Double thing
+                    fprintf(printf_fp, format, *((float *)data));
+                else
+                    fprintf(printf_fp, format, *((double *)data));
+                break;
+
+            // Strings are handled in a special way
+            case 's':
+                fprintf(printf_fp, format, (char *)data);
+                break;
+
+            // % is special
+            case '%':
+                fprintf(printf_fp, "%%");
+                break;
+
+            // Everything else is just printed out as-is
+            default:
+                fprintf(printf_fp, format);
+                break;
+        }
+        data += CUPRINTF_ALIGN_SIZE;         // Move on to next argument
+        *p = c;                     // Restore what we removed
+        fmt = p;                    // Adjust fmt string to be past the specifier
+        p = strchr(fmt, '%');       // and get the next specifier
+    }
+
+    // Print out the last of the string
+    fputs(fmt, printf_fp);
+    return 1;
+}
+
+
+//
+//  doPrintfDisplay
+//
+//  This runs through the blocks of CUPRINTF_MAX_LEN-sized data, calling the
+//  print function above to display them. We've got this separate from
+//  cudaPrintfDisplay() below so we can handle the SM_10 architecture
+//  partitioning.
+//
+static int doPrintfDisplay(int headings, int clear, char *bufstart, char *bufend, char *bufptr, char *endptr)
+{
+    // Grab, piece-by-piece, each output element until we catch
+    // up with the circular buffer end pointer
+    int printf_count=0;
+    char printfbuf_local[CUPRINTF_MAX_LEN+1];
+    printfbuf_local[CUPRINTF_MAX_LEN] = '\0';
+
+    while(bufptr != endptr)
+    {
+        // Wrap ourselves at the end-of-buffer
+        if(bufptr == bufend)
+            bufptr = bufstart;
+
+        // Adjust our start pointer to within the circular buffer and copy a block.
+        cudaMemcpy(printfbuf_local, bufptr, CUPRINTF_MAX_LEN, cudaMemcpyDeviceToHost);
+
+        // If the magic number isn't valid, then this write hasn't gone through
+        // yet and we'll wait until it does (or we're past the end for non-async printfs).
+        cuPrintfHeader *hdr = (cuPrintfHeader *)printfbuf_local;
+        if((hdr->magic != CUPRINTF_SM11_MAGIC) || (hdr->fmtoffset >= CUPRINTF_MAX_LEN))
+        {
+            //fprintf(printf_fp, "Bad magic number in printf header\n");
+            break;
+        }
+
+        // Extract all the info and get this printf done
+        if(headings)
+            fprintf(printf_fp, "[%d, %d]: ", hdr->blockid, hdr->threadid);
+        if(hdr->fmtoffset == 0)
+            fprintf(printf_fp, "printf buffer overflow\n");
+        else if(!outputPrintfData(printfbuf_local+hdr->fmtoffset, printfbuf_local+sizeof(cuPrintfHeader)))
+            break;
+        printf_count++;
+
+        // Clear if asked
+        if(clear)
+            cudaMemset(bufptr, 0, CUPRINTF_MAX_LEN);
+
+        // Now advance our start location, because we're done, and keep copying
+        bufptr += CUPRINTF_MAX_LEN;
+    }
+
+    return printf_count;
+}
+
+
+//
+//  cudaPrintfInit
+//
+//  Takes a buffer length to allocate, creates the memory on the device and
+//  returns a pointer to it for when a kernel is called. It's up to the caller
+//  to free it.
+//
+extern "C" cudaError_t cudaPrintfInit(size_t bufferLen)
+{
+    // Fix up bufferlen to be a multiple of CUPRINTF_MAX_LEN
+    bufferLen = (bufferLen < CUPRINTF_MAX_LEN) ? CUPRINTF_MAX_LEN : bufferLen;
+    if((bufferLen % CUPRINTF_MAX_LEN) > 0)
+        bufferLen += (CUPRINTF_MAX_LEN - (bufferLen % CUPRINTF_MAX_LEN));
+    printfbuf_len = (int)bufferLen;
+
+    // Allocate a print buffer on the device and zero it
+    if(cudaMalloc((void **)&printfbuf_device, printfbuf_len) != cudaSuccess)
+		return cudaErrorInitializationError;
+    cudaMemset(printfbuf_device, 0, printfbuf_len);
+    printfbuf_start = printfbuf_device;         // Where we start reading from
+
+	// No restrictions to begin with
+	cuPrintfRestriction restrict;
+	restrict.threadid = restrict.blockid = CUPRINTF_UNRESTRICTED;
+	cudaMemcpyToSymbol(restrictRules, &restrict, sizeof(restrict));
+
+    // Initialise the buffer and the respective lengths/pointers.
+    cudaMemcpyToSymbol(globalPrintfBuffer, &printfbuf_device, sizeof(char *));
+    cudaMemcpyToSymbol(printfBufferPtr, &printfbuf_device, sizeof(char *));
+    cudaMemcpyToSymbol(printfBufferLength, &printfbuf_len, sizeof(printfbuf_len));
+
+    return cudaSuccess;
+}
+
+
+//
+//  cudaPrintfEnd
+//
+//  Frees up the memory which we allocated
+//
+extern "C" void cudaPrintfEnd()
+{
+    if(!printfbuf_start || !printfbuf_device)
+        return;
+
+    cudaFree(printfbuf_device);
+    printfbuf_start = printfbuf_device = NULL;
+}
+
+
+//
+//  cudaPrintfDisplay
+//
+//  Each call to this function dumps the entire current contents
+//	of the printf buffer to the pre-specified FILE pointer. The
+//	circular "start" pointer is advanced so that subsequent calls
+//	dumps only new stuff.
+//
+//  In the case of async memory access (via streams), call this
+//  repeatedly to keep trying to empty the buffer. If it's a sync
+//  access, then the whole buffer should empty in one go.
+//
+//	Arguments:
+//		outputFP     - File descriptor to output to (NULL => stdout)
+//		showThreadID - If true, prints [block,thread] before each line
+//
+extern "C" cudaError_t cudaPrintfDisplay(void *outputFP, bool showThreadID)
+{
+	printf_fp = (FILE *)((outputFP == NULL) ? stdout : outputFP);
+
+    // For now, we force "synchronous" mode which means we're not concurrent
+	// with kernel execution. This also means we don't need clearOnPrint.
+	// If you're patching it for async operation, here's where you want it.
+    bool sync_printfs = true;
+	bool clearOnPrint = false;
+
+    // Initialisation check
+    if(!printfbuf_start || !printfbuf_device || !printf_fp)
+        return cudaErrorMissingConfiguration;
+
+    // To determine which architecture we're using, we read the
+    // first short from the buffer - it'll be the magic number
+    // relating to the version.
+    unsigned short magic;
+    cudaMemcpy(&magic, printfbuf_device, sizeof(unsigned short), cudaMemcpyDeviceToHost);
+
+    // For SM_10 architecture, we've split our buffer into one-per-thread.
+    // That means we must do each thread block separately. It'll require
+    // extra reading. We also, for now, don't support async printfs because
+    // that requires tracking one start pointer per thread.
+    if(magic == CUPRINTF_SM10_MAGIC)
+    {
+        sync_printfs = true;
+	    clearOnPrint = false;
+        int blocklen = 0;
+        char *blockptr = printfbuf_device;
+        while(blockptr < (printfbuf_device + printfbuf_len))
+        {
+            cuPrintfHeaderSM10 hdr;
+            cudaMemcpy(&hdr, blockptr, sizeof(hdr), cudaMemcpyDeviceToHost);
+
+            // We get our block-size-step from the very first header
+            if(hdr.thread_buf_len != 0)
+                blocklen = hdr.thread_buf_len;
+
+            // No magic number means no printfs from this thread
+            if(hdr.magic != CUPRINTF_SM10_MAGIC)
+            {
+                if(blocklen == 0)
+                {
+                    fprintf(printf_fp, "No printf headers found at all!\n");
+                    break;                              // No valid headers!
+                }
+                blockptr += blocklen;
+                continue;
+            }
+
+            // "offset" is non-zero then we can print the block contents
+            if(hdr.offset > 0)
+            {
+                // For synchronous printfs, we must print from endptr->bufend, then from start->end
+                if(sync_printfs)
+                    doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+hdr.offset+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len);
+                doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.offset+CUPRINTF_MAX_LEN);
+            }
+
+            // Move on to the next block and loop again
+            blockptr += hdr.thread_buf_len;
+        }
+    }
+    // For SM_11 and up, everything is a single buffer and it's simple
+    else if(magic == CUPRINTF_SM11_MAGIC)
+    {
+	    // Grab the current "end of circular buffer" pointer.
+        char *printfbuf_end = NULL;
+        cudaMemcpyFromSymbol(&printfbuf_end, printfBufferPtr, sizeof(char *));
+
+        // Adjust our starting and ending pointers to within the block
+        char *bufptr = ((printfbuf_start - printfbuf_device) % printfbuf_len) + printfbuf_device;
+        char *endptr = ((printfbuf_end - printfbuf_device) % printfbuf_len) + printfbuf_device;
+
+        // For synchronous (i.e. after-kernel-exit) printf display, we have to handle circular
+        // buffer wrap carefully because we could miss those past "end".
+        if(sync_printfs)
+            doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, endptr, printfbuf_device+printfbuf_len);
+        doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, bufptr, endptr);
+
+        printfbuf_start = printfbuf_end;
+    }
+    else
+        ;//printf("Bad magic number in cuPrintf buffer header\n");
+
+    // If we were synchronous, then we must ensure that the memory is cleared on exit
+    // otherwise another kernel launch with a different grid size could conflict.
+    if(sync_printfs)
+        cudaMemset(printfbuf_device, 0, printfbuf_len);
+
+    return cudaSuccess;
+}
+
+// Cleanup
+#undef CUPRINTF_MAX_LEN
+#undef CUPRINTF_ALIGN_SIZE
+#undef CUPRINTF_SM10_MAGIC
+#undef CUPRINTF_SM11_MAGIC
+
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh
@ -0,0 +1,162 @@
+/*
+	Copyright 2009 NVIDIA Corporation.  All rights reserved.
+
+	NOTICE TO LICENSEE:   
+
+	This source code and/or documentation ("Licensed Deliverables") are subject 
+	to NVIDIA intellectual property rights under U.S. and international Copyright 
+	laws.  
+
+	These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL 
+	to NVIDIA and is being provided under the terms and conditions of a form of 
+	NVIDIA software license agreement by and between NVIDIA and Licensee ("License 
+	Agreement") or electronically accepted by Licensee.  Notwithstanding any terms 
+	or conditions to the contrary in the License Agreement, reproduction or 
+	disclosure of the Licensed Deliverables to any third party without the express 
+	written consent of NVIDIA is prohibited.     
+
+	NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, 
+	NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED 
+	DELIVERABLES FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED 
+	WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE 
+	LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
+	NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   NOTWITHSTANDING ANY 
+	TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL 
+	NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, 
+	OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,	WHETHER 
+	IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF 
+	OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.  
+
+	U.S. Government End Users. These Licensed Deliverables are a "commercial item" 
+	as that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
+	"commercial computer  software"  and "commercial computer software documentation" 
+	as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the 
+	U.S. Government only as a commercial end item.  Consistent with 48 C.F.R.12.212 
+	and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government 
+	End Users acquire the Licensed Deliverables with only those rights set forth 
+	herein. 
+
+	Any use of the Licensed Deliverables in individual and commercial software must 
+	include, in the user documentation and internal comments to the code, the above 
+	Disclaimer and U.S. Government End Users Notice.
+ */
+
+#ifndef CUPRINTF_H
+#define CUPRINTF_H
+
+/*
+ *	This is the header file supporting cuPrintf.cu and defining both
+ *	the host and device-side interfaces. See that file for some more
+ *	explanation and sample use code. See also below for details of the
+ *	host-side interfaces.
+ *
+ *  Quick sample code:
+ *
+	#include "cuPrintf.cu"
+ 	
+	__global__ void testKernel(int val)
+	{
+		cuPrintf("Value is: %d\n", val);
+	}
+
+	int main()
+	{
+		cudaPrintfInit();
+		testKernel<<< 2, 3 >>>(10);
+		cudaPrintfDisplay(stdout, true);
+		cudaPrintfEnd();
+        return 0;
+	}
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+// DEVICE SIDE
+// External function definitions for device-side code
+
+// Abuse of templates to simulate varargs
+__device__ int cuPrintf(const char *fmt);
+template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1);
+template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
+template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
+template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
+template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);
+
+
+//
+//	cuPrintfRestrict
+//
+//	Called to restrict output to a given thread/block. Pass
+//	the constant CUPRINTF_UNRESTRICTED to unrestrict output
+//	for thread/block IDs. Note you can therefore allow
+//	"all printfs from block 3" or "printfs from thread 2
+//	on all blocks", or "printfs only from block 1, thread 5".
+//
+//	Arguments:
+//		threadid - Thread ID to allow printfs from
+//		blockid - Block ID to allow printfs from
+//
+//	NOTE: Restrictions last between invocations of
+//	kernels unless cudaPrintfInit() is called again.
+//
+#define CUPRINTF_UNRESTRICTED	-1
+__device__ void cuPrintfRestrict(int threadid, int blockid);
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// HOST SIDE
+// External function definitions for host-side code
+
+//
+//	cudaPrintfInit
+//
+//	Call this once to initialise the printf system. If the output
+//	file or buffer size needs to be changed, call cudaPrintfEnd()
+//	before re-calling cudaPrintfInit().
+//
+//	The default size for the buffer is 1 megabyte. For CUDA
+//	architecture 1.1 and above, the buffer is filled linearly and
+//	is completely used;	however for architecture 1.0, the buffer
+//	is divided into as many segments are there are threads, even
+//	if some threads do not call cuPrintf().
+//
+//	Arguments:
+//		bufferLen - Length, in bytes, of total space to reserve
+//		            (in device global memory) for output.
+//
+//	Returns:
+//		cudaSuccess if all is well.
+//
+extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576);   // 1-meg - that's enough for 4096 printfs by all threads put together
+
+//
+//	cudaPrintfEnd
+//
+//	Cleans up all memories allocated by cudaPrintfInit().
+//	Call this at exit, or before calling cudaPrintfInit() again.
+//
+extern "C" void cudaPrintfEnd();
+
+//
+//	cudaPrintfDisplay
+//
+//	Dumps the contents of the output buffer to the specified
+//	file pointer. If the output pointer is not specified,
+//	the default "stdout" is used.
+//
+//	Arguments:
+//		outputFP     - A file pointer to an output stream.
+//		showThreadID - If "true", output strings are prefixed
+//		               by "[blockid, threadid] " at output.
+//
+//	Returns:
+//		cudaSuccess if all is well.
+//
+extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);
+
+#endif  // CUPRINTF_H
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu
@ -0,0 +1,27 @@
+#include "cuPrintf.cu"
+#include <stdio.h>
+
+__global__ void device_greetings(void)
+{
+	cuPrintf("Hello, world from the device!\n");
+}
+
+int main(void)
+{
+	// greet from the host
+	printf("Hello, world from the host!\n");
+
+	// initialize cuPrintf
+	cudaPrintfInit();
+
+	// launch a kernel with a single thread to greet from the device
+	device_greetings<<<1,1>>>();
+
+	// display the device's greeting
+	cudaPrintfDisplay();
+
+	// clean up after cuPrintf
+	cudaPrintfEnd();
+
+	return 0;
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt
@ -0,0 +1,33 @@
+set(LIBRARY ethash)
+set(CMAKE_BUILD_TYPE Release)
+
+if (NOT MSVC)
+	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
+endif()
+
+set(FILES 	util.c
+          	util.h
+          	internal.c
+          	ethash.h
+          	endian.h
+          	compiler.h
+          	fnv.h
+          	data_sizes.h)
+
+if (NOT CRYPTOPP_FOUND)
+	find_package(CryptoPP 5.6.2)
+endif()
+
+if (CRYPTOPP_FOUND)
+	add_definitions(-DWITH_CRYPTOPP)
+	include_directories( ${CRYPTOPP_INCLUDE_DIRS} )
+	list(APPEND FILES sha3_cryptopp.cpp sha3_cryptopp.h)
+else()
+	list(APPEND FILES sha3.c sha3.h)
+endif()
+
+add_library(${LIBRARY} ${FILES})
+
+if (CRYPTOPP_FOUND)
+	TARGET_LINK_LIBRARIES(${LIBRARY} ${CRYPTOPP_LIBRARIES})
+endif()
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h
@ -0,0 +1,33 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file compiler.h
+ * @date 2014
+ */
+#pragma once
+
+// Visual Studio doesn't support the inline keyword in C mode
+#if defined(_MSC_VER) && !defined(__cplusplus)
+#define inline __inline
+#endif
+
+// pretend restrict is a standard keyword
+#if defined(_MSC_VER)
+#define restrict __restrict
+#else
+#define restrict __restrict__
+#endif
+
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h
@ -0,0 +1,248 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software FoundationUUU,either version 3 of the LicenseUUU,or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be usefulU,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If notUUU,see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file nth_prime.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+// TODO: Update this after ~7 years
+
+#pragma once
+
+#include <stdint.h>
+//#include <Security/Security.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+// 500 Epochs worth of tabulated DAG sizes (~3.5 Years)
+
+// Generated with the following Mathematica Code:
+// GetDataSizes[n_] := Module[{
+//        DAGSizeBytesInit = 2^30,
+//        MixBytes = 128,
+//        DAGGrowth = 113000000,
+//        j = 0},
+//        Reap[
+//          While[j < n,
+//            Module[{i =
+//              Floor[(DAGSizeBytesInit + DAGGrowth * j) / MixBytes]},
+//              While[! PrimeQ[i], i--];
+//              Sow[i*MixBytes]; j++]]]][[2]][[1]]
+
+static const size_t dag_sizes[] = {
+        1073739904U, 1186739584U, 1299741568U, 1412741248U, 1525741696U,
+        1638736768U, 1751741312U, 1864740736U, 1977740672U, 2090740864U,
+        2203740544U, 2316741248U, 2429739392U, 2542740352U, 2655741824U,
+        2768739712U, 2881740416U, 2994741632U, 3107740544U, 3220741504U,
+        3333738112U, 3446741632U, 3559741312U, 3672740224U, 3785740928U,
+        3898738304U, 4011741824U, 4124739712U, 4237735808U, 4350740864U,
+        4463741824U, 4576741504U, 4689741184U, 4802739328U, 4915741568U,
+        5028740224U, 5141740672U, 5254738304U, 5367741824U, 5480737664U,
+        5593738112U, 5706741632U, 5819740544U, 5932734592U, 6045739904U,
+        6158740096U, 6271740032U, 6384731776U, 6497732992U, 6610740352U,
+        6723741056U, 6836741504U, 6949740416U, 7062740096U, 7175741824U,
+        7288740224U, 7401741184U, 7514741632U, 7627741568U, 7740739712U,
+        7853739136U, 7966740352U, 8079741568U, 8192739712U, 8305738624U,
+        8418740864U, 8531740288U, 8644740736U, 8757735808U, 8870738816U,
+        8983739264U, 9096740992U, 9209740928U, 9322739584U, 9435741824U,
+        9548741504U, 9661739392U, 9774738304U, 9887741312U, 10000738688U,
+        10113739136U, 10226741632U, 10339739776U, 10452741248U, 10565740928U,
+        10678736512U, 10791734656U, 10904741248U, 11017738112U, 11130741632U,
+        11243741312U, 11356739456U, 11469740416U, 11582734976U, 11695739008U,
+        11808741248U, 11921734784U, 12034739072U, 12147741568U, 12260737408U,
+        12373741696U, 12486738304U, 12599740544U, 12712740224U, 12825741184U,
+        12938736256U, 13051741312U, 13164737408U, 13277738368U, 13390738048U,
+        13503741824U, 13616741504U, 13729737088U, 13842740096U, 13955741312U,
+        14068741504U, 14181740416U, 14294741632U, 14407739776U, 14520740224U,
+        14633740928U, 14746736512U, 14859741824U, 14972740736U, 15085740928U,
+        15198738304U, 15311732096U, 15424740736U, 15537739904U, 15650741632U,
+        15763741568U, 15876737152U, 15989741696U, 16102740608U, 16215741056U,
+        16328741248U, 16441740416U, 16554737792U, 16667740288U, 16780740992U,
+        16893738112U, 17006741632U, 17119739008U, 17232735616U, 17345739392U,
+        17458740352U, 17571736192U, 17684739712U, 17797739392U, 17910740096U,
+        18023741312U, 18136740736U, 18249738112U, 18362738816U, 18475735424U,
+        18588740224U, 18701738368U, 18814736768U, 18927737216U, 19040739968U,
+        19153739648U, 19266736768U, 19379737984U, 19492739456U, 19605738368U,
+        19718740352U, 19831741312U, 19944736384U, 20057741696U, 20170741376U,
+        20283741824U, 20396737408U, 20509741696U, 20622741376U, 20735739008U,
+        20848741504U, 20961740672U, 21074739328U, 21187740032U, 21300739456U,
+        21413741696U, 21526740608U, 21639741824U, 21752737408U, 21865741696U,
+        21978741376U, 22091741824U, 22204738432U, 22317740672U, 22430740096U,
+        22543736704U, 22656741248U, 22769739904U, 22882739584U, 22995740288U,
+        23108740736U, 23221740928U, 23334741376U, 23447737216U, 23560740992U,
+        23673741184U, 23786740864U, 23899737728U, 24012741248U, 24125734784U,
+        24238736512U, 24351741824U, 24464740736U, 24577737088U, 24690741632U,
+        24803739776U, 24916740736U, 25029740416U, 25142740864U, 25255741568U,
+        25368741248U, 25481740672U, 25594741376U, 25707741568U, 25820741504U,
+        25933730432U, 26046739072U, 26159741824U, 26272741504U, 26385740672U,
+        26498740096U, 26611741568U, 26724740992U, 26837739904U, 26950735232U,
+        27063738496U, 27176741248U, 27289741184U, 27402740864U, 27515740544U,
+        27628737152U, 27741740672U, 27854741632U, 27967740544U, 28080739712U,
+        28193738368U, 28306741376U, 28419737728U, 28532739968U, 28645739648U,
+        28758740096U, 28871741312U, 28984739456U, 29097740416U, 29210740864U,
+        29323741312U, 29436740224U, 29549741696U, 29662738304U, 29775741568U,
+        29888741504U, 30001740928U, 30114737024U, 30227735168U, 30340737664U,
+        30453738368U, 30566737024U, 30679733632U, 30792740224U, 30905740928U,
+        31018740352U, 31131740032U, 31244738944U, 31357737344U, 31470741376U,
+        31583740544U, 31696740224U, 31809738112U, 31922739328U, 32035737472U,
+        32148740992U, 32261741696U, 32374740352U, 32487741824U, 32600740736U,
+        32713739648U, 32826740608U, 32939729792U, 33052740992U, 33165740672U,
+        33278739584U, 33391741312U, 33504739712U, 33617740928U, 33730740608U,
+        33843738496U, 33956739968U, 34069741696U, 34182739328U, 34295741824U,
+        34408739968U, 34521740672U, 34634736512U, 34747741568U, 34860741248U,
+        34973739392U, 35086738304U, 35199741056U, 35312736896U, 35425741184U,
+        35538741376U, 35651740288U, 35764737152U, 35877741184U, 35990739584U,
+        36103740544U, 36216740992U, 36329739392U, 36442737536U, 36555741568U,
+        36668740736U, 36781741184U, 36894737024U, 37007741312U, 37120739456U,
+        37233741184U, 37346736256U, 37459736192U, 37572734336U, 37685739904U,
+        37798740352U, 37911737728U, 38024741504U, 38137739648U, 38250740608U,
+        38363741824U, 38476740992U, 38589741184U, 38702740096U, 38815741312U,
+        38928741248U, 39041738368U, 39154739584U, 39267741824U, 39380739712U,
+        39493735808U, 39606741632U, 39719741312U, 39832741504U, 39945739648U,
+        40058740352U, 40171740032U, 40284740992U, 40397740672U, 40510740352U,
+        40623740288U, 40736738176U, 40849737856U, 40962741376U, 41075739776U,
+        41188737664U, 41301735808U, 41414738048U, 41527741312U, 41640740992U,
+        41753739904U, 41866739072U, 41979738496U, 42092740736U, 42205739648U,
+        42318740608U, 42431741312U, 42544738688U, 42657741184U, 42770738048U,
+        42883741568U, 42996741248U, 43109740928U, 43222736512U, 43335741056U,
+        43448730496U, 43561740416U, 43674741632U, 43787740544U, 43900741504U,
+        44013739648U, 44126740864U, 44239740544U, 44352741248U, 44465738368U,
+        44578735232U, 44691739264U, 44804741504U, 44917741696U, 45030741376U,
+        45143741824U, 45256740992U, 45369739136U, 45482740096U, 45595739776U,
+        45708739712U, 45821740672U, 45934741376U, 46047741056U, 46160741248U,
+        46273737088U, 46386740864U, 46499739008U, 46612739968U, 46725735296U,
+        46838740864U, 46951741568U, 47064737152U, 47177741696U, 47290741376U,
+        47403738752U, 47516741248U, 47629739648U, 47742741632U, 47855737984U,
+        47968740224U, 48081738368U, 48194741632U, 48307739264U, 48420739712U,
+        48533739136U, 48646738304U, 48759741824U, 48872741504U, 48985739392U,
+        49098741376U, 49211741056U, 49324740992U, 49437738368U, 49550740864U,
+        49663735424U, 49776737408U, 49889740672U, 50002738816U, 50115738752U,
+        50228739712U, 50341741696U, 50454736768U, 50567738752U, 50680739968U,
+        50793736832U, 50906734976U, 51019741568U, 51132739456U, 51245741696U,
+        51358741376U, 51471741056U, 51584738944U, 51697734272U, 51810739072U,
+        51923736448U, 52036740736U, 52149741184U, 52262737024U, 52375738496U,
+        52488740992U, 52601739136U, 52714740352U, 52827736448U, 52940738176U,
+        53053741696U, 53166740864U, 53279741824U, 53392741504U, 53505739136U,
+        53618739584U, 53731741312U, 53844741248U, 53957741696U, 54070741376U,
+        54183740288U, 54296741504U, 54409741696U, 54522739072U, 54635737472U,
+        54748741504U, 54861736064U, 54974740096U, 55087741568U, 55200733568U,
+        55313741696U, 55426734464U, 55539741056U, 55652741504U, 55765741184U,
+        55878741376U, 55991730304U, 56104740992U, 56217740672U, 56330731648U,
+        56443737472U, 56556724352U, 56669740672U, 56782739072U, 56895740032U,
+        57008741248U, 57121741696U, 57234740096U, 57347741312U, 57460741504U
+};
+
+// 500 Epochs worth of tabulated DAG sizes (~3.5 Years)
+
+// Generated with the following Mathematica Code:
+// GetCacheSizes[n_] := Module[{
+//        DAGSizeBytesInit = 2^30,
+//        MixBytes = 128,
+//        DAGGrowth = 113000000,
+//        HashBytes = 64,
+//        DAGParents = 1024,
+//        j = 0},
+//    Reap[
+//      While[j < n,
+//       Module[{i = Floor[(DAGSizeBytesInit + DAGGrowth * j) / (DAGParents * HashBytes)]},
+//        While[! PrimeQ[i], i--];
+//        Sow[i*HashBytes]; j++]]]][[2]][[1]]
+
+const size_t cache_sizes[] = {
+        1048384U, 1158208U, 1268416U, 1377856U, 1489856U, 1599296U, 1710656U,
+        1820608U, 1930816U, 2041024U, 2151872U, 2261696U, 2371904U, 2482624U,
+        2593216U, 2703296U, 2814016U, 2924224U, 3034816U, 3144896U, 3255488U,
+        3365312U, 3475904U, 3586624U, 3696064U, 3806272U, 3917504U, 4027456U,
+        4138304U, 4248512U, 4359104U, 4469312U, 4579264U, 4689728U, 4797376U,
+        4909888U, 5020096U, 5131328U, 5241664U, 5351744U, 5461312U, 5572544U,
+        5683264U, 5793472U, 5903552U, 6014144U, 6121664U, 6235072U, 6344896U,
+        6454592U, 6565952U, 6675904U, 6786112U, 6896704U, 7006784U, 7117888U,
+        7228096U, 7338304U, 7448768U, 7557952U, 7669184U, 7779776U, 7889216U,
+        8000192U, 8110912U, 8220736U, 8331712U, 8441536U, 8552384U, 8662592U,
+        8772928U, 8883136U, 8993728U, 9103168U, 9214528U, 9323968U, 9434816U,
+        9545152U, 9655616U, 9766336U, 9876544U, 9986624U, 10097344U, 10207424U,
+        10316864U, 10427968U, 10538432U, 10649152U, 10758976U, 10869568U, 10979776U,
+        11089472U, 11200832U, 11309632U, 11420608U, 11531584U, 11641792U, 11751104U,
+        11862976U, 11973184U, 12083264U, 12193856U, 12304064U, 12414656U, 12524608U,
+        12635072U, 12745792U, 12855616U, 12965824U, 13076416U, 13187008U, 13297216U,
+        13407808U, 13518016U, 13627072U, 13738688U, 13848256U, 13959488U, 14069696U,
+        14180288U, 14290624U, 14399552U, 14511424U, 14621504U, 14732096U, 14841664U,
+        14951744U, 15062336U, 15172672U, 15283264U, 15393088U, 15504448U, 15614272U,
+        15723712U, 15834944U, 15945152U, 16055744U, 16165696U, 16277056U, 16387136U,
+        16494784U, 16607936U, 16718272U, 16828736U, 16938176U, 17048384U, 17159872U,
+        17266624U, 17380544U, 17490496U, 17600192U, 17711296U, 17821376U, 17931968U,
+        18041152U, 18152896U, 18261952U, 18373568U, 18483392U, 18594112U, 18703936U,
+        18814912U, 18924992U, 19034944U, 19145408U, 19256128U, 19366208U, 19477184U,
+        19587136U, 19696576U, 19808192U, 19916992U, 20028352U, 20137664U, 20249024U,
+        20358848U, 20470336U, 20580544U, 20689472U, 20801344U, 20911424U, 21020096U,
+        21130688U, 21242176U, 21352384U, 21462208U, 21573824U, 21683392U, 21794624U,
+        21904448U, 22013632U, 22125248U, 22235968U, 22344512U, 22456768U, 22566848U,
+        22677056U, 22786496U, 22897984U, 23008064U, 23118272U, 23228992U, 23338816U,
+        23449408U, 23560256U, 23670464U, 23780672U, 23891264U, 24001216U, 24110656U,
+        24221888U, 24332608U, 24442688U, 24552512U, 24662464U, 24773696U, 24884032U,
+        24994496U, 25105216U, 25215296U, 25324864U, 25435712U, 25546432U, 25655744U,
+        25767232U, 25876672U, 25986368U, 26098112U, 26207936U, 26318912U, 26428736U,
+        26539712U, 26650048U, 26760256U, 26869184U, 26979776U, 27091136U, 27201728U,
+        27311552U, 27422272U, 27532352U, 27642304U, 27752896U, 27863744U, 27973952U,
+        28082752U, 28194752U, 28305344U, 28415168U, 28524992U, 28636352U, 28746304U,
+        28857152U, 28967104U, 29077184U, 29187904U, 29298496U, 29408576U, 29518912U,
+        29628992U, 29739968U, 29850176U, 29960512U, 30070336U, 30180544U, 30290752U,
+        30398912U, 30512192U, 30622784U, 30732992U, 30842176U, 30953536U, 31063744U,
+        31174336U, 31284544U, 31395136U, 31504448U, 31615552U, 31725632U, 31835072U,
+        31946176U, 32057024U, 32167232U, 32277568U, 32387008U, 32497984U, 32608832U,
+        32719168U, 32829376U, 32939584U, 33050048U, 33160768U, 33271232U, 33381184U,
+        33491648U, 33601856U, 33712576U, 33822016U, 33932992U, 34042816U, 34153024U,
+        34263104U, 34373824U, 34485056U, 34594624U, 34704832U, 34816064U, 34926272U,
+        35036224U, 35146816U, 35255104U, 35367104U, 35478208U, 35588416U, 35698496U,
+        35808832U, 35918656U, 36029888U, 36139456U, 36250688U, 36360512U, 36471104U,
+        36581696U, 36691136U, 36802112U, 36912448U, 37022912U, 37132864U, 37242944U,
+        37354048U, 37464512U, 37574848U, 37684928U, 37794752U, 37904704U, 38015552U,
+        38125888U, 38236864U, 38345792U, 38457152U, 38567744U, 38678336U, 38787776U,
+        38897216U, 39009088U, 39117632U, 39230144U, 39340352U, 39450304U, 39560384U,
+        39671488U, 39781312U, 39891392U, 40002112U, 40112704U, 40223168U, 40332608U,
+        40443968U, 40553792U, 40664768U, 40774208U, 40884416U, 40993984U, 41105984U,
+        41215424U, 41326528U, 41436992U, 41546048U, 41655872U, 41768128U, 41878336U,
+        41988928U, 42098752U, 42209344U, 42319168U, 42429248U, 42540352U, 42649792U,
+        42761024U, 42871616U, 42981824U, 43092032U, 43201856U, 43312832U, 43423552U,
+        43533632U, 43643584U, 43753792U, 43864384U, 43974976U, 44084032U, 44195392U,
+        44306368U, 44415296U, 44526016U, 44637248U, 44746816U, 44858048U, 44967872U,
+        45078848U, 45188288U, 45299264U, 45409216U, 45518272U, 45630272U, 45740224U,
+        45850432U, 45960896U, 46069696U, 46182208U, 46292416U, 46402624U, 46512064U,
+        46623296U, 46733888U, 46843712U, 46953664U, 47065024U, 47175104U, 47285696U,
+        47395904U, 47506496U, 47615296U, 47726912U, 47837632U, 47947712U, 48055232U,
+        48168128U, 48277952U, 48387392U, 48499648U, 48609472U, 48720064U, 48830272U,
+        48940096U, 49050944U, 49160896U, 49271744U, 49381568U, 49492288U, 49602752U,
+        49712576U, 49822016U, 49934272U, 50042816U, 50154304U, 50264128U, 50374336U,
+        50484416U, 50596288U, 50706752U, 50816704U, 50927168U, 51035456U, 51146944U,
+        51258176U, 51366976U, 51477824U, 51589568U, 51699776U, 51809728U, 51920576U,
+        52030016U, 52140736U, 52251328U, 52361152U, 52470592U, 52582592U, 52691776U,
+        52803136U, 52912576U, 53020736U, 53132224U, 53242688U, 53354816U, 53465536U,
+        53575232U, 53685568U, 53796544U, 53906752U, 54016832U, 54126656U, 54236992U,
+        54347456U, 54457408U, 54569024U, 54679232U, 54789184U, 54899776U, 55008832U,
+        55119296U, 55231168U, 55341248U, 55451584U, 55562048U, 55672256U, 55782208U,
+        55893184U, 56002112U, 56113216U
+};
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h
@ -0,0 +1,74 @@
+#pragma once
+
+#include <stdint.h>
+#include "compiler.h"
+
+static const uint8_t BitReverseTable256[] =
+        {
+                0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+                0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+                0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+                0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+                0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+                0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+                0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+                0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+                0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+                0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+                0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+                0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+                0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+                0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+                0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+                0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+        };
+
+static inline uint32_t bitfn_swap32(uint32_t a) {
+    return (BitReverseTable256[a & 0xff] << 24) |
+            (BitReverseTable256[(a >> 8) & 0xff] << 16) |
+            (BitReverseTable256[(a >> 16) & 0xff] << 8) |
+            (BitReverseTable256[(a >> 24) & 0xff]);
+}
+
+static inline uint64_t bitfn_swap64(uint64_t a) {
+    return ((uint64_t) bitfn_swap32((uint32_t) (a >> 32))) |
+            (((uint64_t) bitfn_swap32((uint32_t) a)) << 32);
+}
+
+#if defined(__MINGW32__) || defined(_WIN32)
+  # define LITTLE_ENDIAN 1234
+  # define BYTE_ORDER    LITTLE_ENDIAN
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+  # include <sys/endian.h>
+#elif defined(__OpenBSD__) || defined(__SVR4)
+  # include <sys/types.h>
+#elif defined(__APPLE__)
+# include <machine/endian.h>
+#elif defined( BSD ) && (BSD >= 199103)
+  # include <machine/endian.h>
+#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ )
+  # define LITTLE_ENDIAN 1234
+  # define BYTE_ORDER    LITTLE_ENDIAN
+#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ )
+  # define BIG_ENDIAN 1234
+  # define BYTE_ORDER    BIG_ENDIAN
+#else
+
+# include <endian.h>
+
+#endif
+
+
+#if LITTLE_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(x) (x)
+#define fix_endian64(x) (x)
+
+#elif BIG_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(x) bitfn_swap32(x)
+#define fix_endian64(x) bitfn_swap64(x)
+
+#else
+# error "endian not supported"
+#endif // BYTE_ORDER
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h
@ -0,0 +1,88 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file ethash.h
+* @date 2015
+*/
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stddef.h>
+#include "compiler.h"
+
+#define REVISION 18
+#define DAGSIZE_BYTES_INIT 1073741824U
+#define DAG_GROWTH 113000000U
+#define EPOCH_LENGTH 30000U
+#define MIX_BYTES 128
+#define DAG_PARENTS 256
+#define CACHE_ROUNDS 3
+#define ACCESSES 64
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ethash_params {
+    size_t full_size;               // Size of full data set (in bytes, multiple of mix size (128)).
+    size_t cache_size;              // Size of compute cache (in bytes, multiple of node size (64)).
+} ethash_params;
+
+typedef struct ethash_return_value {
+    uint8_t result[32];
+    uint8_t mix_hash[32];
+} ethash_return_value;
+
+size_t const ethash_get_datasize(const uint32_t block_number);
+size_t const ethash_get_cachesize(const uint32_t block_number);
+
+// initialize the parameters
+static inline void ethash_params_init(ethash_params *params, const uint32_t block_number) {
+    params->full_size = ethash_get_datasize(block_number);
+    params->cache_size = ethash_get_cachesize(block_number);
+}
+
+typedef struct ethash_cache {
+    void *mem;
+} ethash_cache;
+
+void ethash_mkcache(ethash_cache *cache, ethash_params const *params, const uint8_t seed[32]);
+void ethash_compute_full_data(void *mem, ethash_params const *params, ethash_cache const *cache);
+void ethash_full(ethash_return_value *ret, void const *full_mem, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce);
+void ethash_light(ethash_return_value *ret, ethash_cache const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce);
+
+static inline int ethash_check_difficulty(
+        const uint8_t hash[32],
+        const uint8_t difficulty[32]) {
+    // Difficulty is big endian
+    for (int i = 0; i < 32; i++) {
+        if (hash[i] == difficulty[i]) continue;
+        return hash[i] < difficulty[i];
+    }
+    return 0;
+}
+
+int ethash_quick_check_difficulty(
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32],
+        const uint8_t difficulty[32]);
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h
@ -0,0 +1,38 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file fnv.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+#pragma once
+#include <stdint.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FNV_PRIME 0x01000193
+
+static inline uint32_t fnv_hash(const uint32_t x, const uint32_t y) {
+	return x*FNV_PRIME ^ y;
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c
@ -0,0 +1,297 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file dash.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @author Matthew Wampler-Doty
+* @date 2015
+*/
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include "ethash.h"
+#include "fnv.h"
+#include "endian.h"
+#include "internal.h"
+#include "data_sizes.h"
+
+#ifdef WITH_CRYPTOPP
+
+#include "SHA3_cryptopp.h"
+
+#else
+#include "sha3.h"
+#endif // WITH_CRYPTOPP
+
+size_t const ethash_get_datasize(const uint32_t block_number) {
+    assert(block_number / EPOCH_LENGTH < 500);
+    return dag_sizes[block_number / EPOCH_LENGTH];
+}
+
+size_t const ethash_get_cachesize(const uint32_t block_number) {
+    assert(block_number / EPOCH_LENGTH < 500);
+    return cache_sizes[block_number / EPOCH_LENGTH];
+}
+
+// Follows Sergio's "STRICT MEMORY HARD HASHING FUNCTIONS" (2014)
+// https://bitslog.files.wordpress.com/2013/12/memohash-v0-3.pdf
+// SeqMemoHash(s, R, N)
+void static ethash_compute_cache_nodes(
+        node *const nodes,
+        ethash_params const *params,
+        const uint8_t seed[32]) {
+    assert((params->cache_size % sizeof(node)) == 0);
+    uint32_t const num_nodes = (uint32_t)(params->cache_size / sizeof(node));
+
+    SHA3_512(nodes[0].bytes, seed, 32);
+
+    for (unsigned i = 1; i != num_nodes; ++i) {
+        SHA3_512(nodes[i].bytes, nodes[i - 1].bytes, 64);
+    }
+
+    for (unsigned j = 0; j != CACHE_ROUNDS; j++) {
+        for (unsigned i = 0; i != num_nodes; i++) {
+            uint32_t const idx = nodes[i].words[0] % num_nodes;
+            node data;
+            data = nodes[(num_nodes - 1 + i) % num_nodes];
+			for (unsigned w = 0; w != NODE_WORDS; ++w)
+			{
+				data.words[w] ^= nodes[idx].words[w];
+			}
+            SHA3_512(nodes[i].bytes, data.bytes, sizeof(data));
+        }
+    }
+
+    // now perform endian conversion
+#if BYTE_ORDER != LITTLE_ENDIAN
+    for (unsigned w = 0; w != (num_nodes*NODE_WORDS); ++w)
+    {
+        nodes->words[w] = fix_endian32(nodes->words[w]);
+    }
+#endif
+}
+
+void ethash_mkcache(
+  ethash_cache *cache,
+        ethash_params const *params,
+        const uint8_t seed[32]) {
+    node *nodes = (node *) cache->mem;
+    ethash_compute_cache_nodes(nodes, params, seed);
+}
+
+void ethash_calculate_dag_item(
+        node *const ret,
+        const unsigned node_index,
+        const struct ethash_params *params,
+        const struct ethash_cache *cache) {
+
+    uint32_t num_parent_nodes = (uint32_t)(params->cache_size / sizeof(node));
+    node const *cache_nodes = (node const *) cache->mem;
+    node const *init = &cache_nodes[node_index % num_parent_nodes];
+
+	memcpy(ret, init, sizeof(node));
+	ret->words[0] ^= node_index;
+	SHA3_512(ret->bytes, ret->bytes, sizeof(node));
+
+#if defined(_M_X64) && ENABLE_SSE
+    __m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME);
+    __m128i xmm0 = ret->xmm[0];
+    __m128i xmm1 = ret->xmm[1];
+    __m128i xmm2 = ret->xmm[2];
+    __m128i xmm3 = ret->xmm[3];
+#endif
+
+    for (unsigned i = 0; i != DAG_PARENTS; ++i)
+	{
+        uint32_t parent_index = ((node_index ^ i)*FNV_PRIME ^ ret->words[i % NODE_WORDS]) % num_parent_nodes;
+        node const *parent = &cache_nodes[parent_index];
+
+		#if defined(_M_X64) && ENABLE_SSE
+        {
+            xmm0 = _mm_mullo_epi32(xmm0, fnv_prime);
+            xmm1 = _mm_mullo_epi32(xmm1, fnv_prime);
+            xmm2 = _mm_mullo_epi32(xmm2, fnv_prime);
+            xmm3 = _mm_mullo_epi32(xmm3, fnv_prime);
+            xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]);
+            xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]);
+            xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]);
+            xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]);
+
+            // have to write to ret as values are used to compute index
+            ret->xmm[0] = xmm0;
+            ret->xmm[1] = xmm1;
+            ret->xmm[2] = xmm2;
+            ret->xmm[3] = xmm3;
+        }
+        #else
+        {
+            for (unsigned w = 0; w != NODE_WORDS; ++w) {
+                ret->words[w] = fnv_hash(ret->words[w], parent->words[w]);
+            }
+        }
+		#endif
+    }
+
+	SHA3_512(ret->bytes, ret->bytes, sizeof(node));
+}
+
+void ethash_compute_full_data(
+        void *mem,
+        ethash_params const *params,
+        ethash_cache const *cache) {
+    assert((params->full_size % (sizeof(uint32_t) * MIX_WORDS)) == 0);
+    assert((params->full_size % sizeof(node)) == 0);
+    node *full_nodes = mem;
+
+    // now compute full nodes
+    for (unsigned n = 0; n != (params->full_size / sizeof(node)); ++n) {
+        ethash_calculate_dag_item(&(full_nodes[n]), n, params, cache);
+    }
+}
+
+static void ethash_hash(
+        ethash_return_value * ret,
+        node const *full_nodes,
+        ethash_cache const *cache,
+        ethash_params const *params,
+        const uint8_t header_hash[32],
+        const uint64_t nonce) {
+
+    assert((params->full_size % MIX_WORDS) == 0);
+
+    // pack hash and nonce together into first 40 bytes of s_mix
+    assert(sizeof(node)*8 == 512);
+    node s_mix[MIX_NODES + 1];
+    memcpy(s_mix[0].bytes, header_hash, 32);
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+    s_mix[0].double_words[4] = fix_endian64(nonce);
+#else
+    s_mix[0].double_words[4] = nonce;
+#endif
+
+    // compute sha3-512 hash and replicate across mix
+    SHA3_512(s_mix->bytes, s_mix->bytes, 40);
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+    for (unsigned w = 0; w != 16; ++w) {
+        s_mix[0].words[w] = fix_endian32(s_mix[0].words[w]);
+    }
+#endif
+
+    node* const mix = s_mix + 1;
+    for (unsigned w = 0; w != MIX_WORDS; ++w) {
+        mix->words[w] = s_mix[0].words[w % NODE_WORDS];
+    }
+
+    unsigned const
+            page_size = sizeof(uint32_t) * MIX_WORDS,
+            num_full_pages = (unsigned)(params->full_size / page_size);
+
+
+    for (unsigned i = 0; i != ACCESSES; ++i)
+	{
+        uint32_t const index = ((s_mix->words[0] ^ i)*FNV_PRIME ^ mix->words[i % MIX_WORDS]) % num_full_pages;
+
+        for (unsigned n = 0; n != MIX_NODES; ++n)
+		{
+            const node * dag_node = &full_nodes[MIX_NODES * index + n];
+
+            if (!full_nodes) {
+                node tmp_node;
+                ethash_calculate_dag_item(&tmp_node, index * MIX_NODES + n, params, cache);
+                dag_node = &tmp_node;
+            }
+
+			#if defined(_M_X64) && ENABLE_SSE
+            {
+                __m128i fnv_prime = _mm_set1_epi32(FNV_PRIME);
+                __m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]);
+                __m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]);
+                __m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]);
+                __m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]);
+                mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]);
+                mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]);
+                mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]);
+                mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]);
+            }
+            #else
+            {
+                for (unsigned w = 0; w != NODE_WORDS; ++w) {
+                    mix[n].words[w] = fnv_hash(mix[n].words[w], dag_node->words[w]);
+                }
+            }
+            #endif
+        }
+
+    }
+
+	// compress mix
+	for (unsigned w = 0; w != MIX_WORDS; w += 4)
+	{
+		uint32_t reduction = mix->words[w+0];
+		reduction = reduction*FNV_PRIME ^ mix->words[w+1];
+		reduction = reduction*FNV_PRIME ^ mix->words[w+2];
+		reduction = reduction*FNV_PRIME ^ mix->words[w+3];
+		mix->words[w/4] = reduction;
+	}
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+    for (unsigned w = 0; w != MIX_WORDS/4; ++w) {
+        mix->words[w] = fix_endian32(mix->words[w]);
+    }
+#endif
+
+    memcpy(ret->mix_hash, mix->bytes, 32);
+    // final Keccak hash
+    SHA3_256(ret->result, s_mix->bytes, 64+32);	// Keccak-256(s + compressed_mix)
+}
+
+void ethash_quick_hash(
+        uint8_t return_hash[32],
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32]) {
+
+    uint8_t buf[64+32];
+    memcpy(buf, header_hash, 32);
+#if BYTE_ORDER != LITTLE_ENDIAN
+    nonce = fix_endian64(nonce);
+#endif
+    memcpy(&(buf[32]), &nonce, 8);
+    SHA3_512(buf, buf, 40);
+    memcpy(&(buf[64]), mix_hash, 32);
+    SHA3_256(return_hash, buf, 64+32);
+}
+
+int ethash_quick_check_difficulty(
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32],
+        const uint8_t difficulty[32]) {
+    uint8_t return_hash[32];
+    ethash_quick_hash(return_hash, header_hash, nonce, mix_hash);
+    return ethash_check_difficulty(return_hash, difficulty);
+}
+
+void ethash_full(ethash_return_value * ret, void const *full_mem, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) {
+    ethash_hash(ret, (node const *) full_mem, NULL, params, previous_hash, nonce);
+}
+
+void ethash_light(ethash_return_value * ret, ethash_cache const *cache, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) {
+    ethash_hash(ret, NULL, cache, params, previous_hash, nonce);
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h
@ -0,0 +1,48 @@
+#pragma once
+#include "compiler.h"
+#include "endian.h"
+#include "ethash.h"
+
+#define ENABLE_SSE 1
+
+#if defined(_M_X64) && ENABLE_SSE
+#include <smmintrin.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// compile time settings
+#define NODE_WORDS (64/4)
+#define MIX_WORDS (MIX_BYTES/4)
+#define MIX_NODES (MIX_WORDS / NODE_WORDS)
+#include <stdint.h>
+
+typedef union node {
+    uint8_t bytes[NODE_WORDS * 4];
+    uint32_t words[NODE_WORDS];
+    uint64_t double_words[NODE_WORDS / 2];
+
+#if defined(_M_X64) && ENABLE_SSE
+	__m128i xmm[NODE_WORDS/4];
+#endif
+
+} node;
+
+void ethash_calculate_dag_item(
+        node *const ret,
+        const unsigned node_index,
+        ethash_params const *params,
+        ethash_cache const *cache
+);
+
+void ethash_quick_hash(
+        uint8_t return_hash[32],
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32]);
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c
@ -0,0 +1,151 @@
+/** libkeccak-tiny
+*
+* A single-file implementation of SHA-3 and SHAKE.
+*
+* Implementor: David Leon Gil
+* License: CC0, attribution kindly requested. Blame taken too,
+* but not liability.
+*/
+#include "sha3.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/******** The Keccak-f[1600] permutation ********/
+
+/*** Constants. ***/
+static const uint8_t rho[24] = \
+  { 1,  3,   6, 10, 15, 21,
+        28, 36, 45, 55,  2, 14,
+        27, 41, 56,  8, 25, 43,
+        62, 18, 39, 61, 20, 44};
+static const uint8_t pi[24] = \
+  {10,  7, 11, 17, 18, 3,
+        5, 16,  8, 21, 24, 4,
+        15, 23, 19, 13, 12, 2,
+        20, 14, 22,  9, 6,  1};
+static const uint64_t RC[24] = \
+  {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+        0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+        0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
+        0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+        0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
+        0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
+
+/*** Helper macros to unroll the permutation. ***/
+#define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
+#define REPEAT6(e) e e e e e e
+#define REPEAT24(e) REPEAT6(e e e e)
+#define REPEAT5(e) e e e e e
+#define FOR5(v, s, e) \
+  v = 0;            \
+  REPEAT5(e; v += s;)
+
+/*** Keccak-f[1600] ***/
+static inline void keccakf(void* state) {
+    uint64_t* a = (uint64_t*)state;
+    uint64_t b[5] = {0};
+    uint64_t t = 0;
+    uint8_t x, y;
+
+    for (int i = 0; i < 24; i++) {
+        // Theta
+        FOR5(x, 1,
+                b[x] = 0;
+                FOR5(y, 5,
+                        b[x] ^= a[x + y]; ))
+        FOR5(x, 1,
+                FOR5(y, 5,
+                        a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
+        // Rho and pi
+        t = a[1];
+        x = 0;
+        REPEAT24(b[0] = a[pi[x]];
+                a[pi[x]] = rol(t, rho[x]);
+                t = b[0];
+                x++; )
+        // Chi
+        FOR5(y,
+                5,
+                FOR5(x, 1,
+                        b[x] = a[y + x];)
+                FOR5(x, 1,
+                a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
+        // Iota
+        a[0] ^= RC[i];
+    }
+}
+
+/******** The FIPS202-defined functions. ********/
+
+/*** Some helper macros. ***/
+
+#define _(S) do { S } while (0)
+#define FOR(i, ST, L, S) \
+  _(for (size_t i = 0; i < L; i += ST) { S; })
+#define mkapply_ds(NAME, S)                                          \
+  static inline void NAME(uint8_t* dst,                              \
+                          const uint8_t* src,                        \
+                          size_t len) {                              \
+    FOR(i, 1, len, S);                                               \
+  }
+#define mkapply_sd(NAME, S)                                          \
+  static inline void NAME(const uint8_t* src,                        \
+                          uint8_t* dst,                              \
+                          size_t len) {                              \
+    FOR(i, 1, len, S);                                               \
+  }
+
+mkapply_ds(xorin, dst[i] ^= src[i])  // xorin
+mkapply_sd(setout, dst[i] = src[i])  // setout
+
+#define P keccakf
+#define Plen 200
+
+// Fold P*F over the full blocks of an input.
+#define foldP(I, L, F) \
+  while (L >= rate) {  \
+    F(a, I, rate);     \
+    P(a);              \
+    I += rate;         \
+    L -= rate;         \
+  }
+
+/** The sponge-based hash construction. **/
+static inline int hash(uint8_t* out, size_t outlen,
+        const uint8_t* in, size_t inlen,
+        size_t rate, uint8_t delim) {
+    if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) {
+        return -1;
+    }
+    uint8_t a[Plen] = {0};
+    // Absorb input.
+    foldP(in, inlen, xorin);
+    // Xor in the DS and pad frame.
+    a[inlen] ^= delim;
+    a[rate - 1] ^= 0x80;
+    // Xor in the last block.
+    xorin(a, in, inlen);
+    // Apply P
+    P(a);
+    // Squeeze output.
+    foldP(out, outlen, setout);
+    setout(a, out, outlen);
+    memset(a, 0, 200);
+    return 0;
+}
+
+#define defsha3(bits)                                             \
+  int sha3_##bits(uint8_t* out, size_t outlen,                    \
+                  const uint8_t* in, size_t inlen) {              \
+    if (outlen > (bits/8)) {                                      \
+      return -1;                                                  \
+    }                                                             \
+    return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x01);  \
+  }
+
+/*** FIPS202 SHA3 FOFs ***/
+defsha3(256)
+defsha3(512)
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h
@ -0,0 +1,27 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "compiler.h"
+#include <stdint.h>
+#include <stdlib.h>
+
+#define decsha3(bits) \
+  int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+decsha3(256)
+decsha3(512)
+
+static inline void SHA3_256(uint8_t * const ret, uint8_t const *data, const size_t size) {
+    sha3_256(ret, 32, data, size);
+}
+
+static inline void SHA3_512(uint8_t * const ret, uint8_t const *data, const size_t size) {
+    sha3_512(ret, 64, data, size);
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp
@ -0,0 +1,34 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file sha3.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @date 2015
+*/
+
+#include <stdint.h>
+#include <cryptopp/sha3.h>
+
+extern "C" {
+void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size) {
+  CryptoPP::SHA3_256().CalculateDigest(ret, data, size);
+}
+
+void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size) {
+  CryptoPP::SHA3_512().CalculateDigest(ret, data, size);
+}
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h
@ -0,0 +1,15 @@
+#pragma once
+
+#include "compiler.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size);
+void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c
@ -0,0 +1,41 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file util.c
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+#include <stdarg.h>
+#include <stdio.h>
+#include "util.h"
+
+#ifdef _MSC_VER
+
+// foward declare without all of Windows.h
+__declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString);
+
+void debugf(const char *str, ...)
+{
+	va_list args;
+    va_start(args, str);
+
+	char buf[1<<16];
+	_vsnprintf_s(buf, sizeof(buf), sizeof(buf), str, args);
+	buf[sizeof(buf)-1] = '\0';
+	OutputDebugStringA(buf);
+}
+
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h
@ -0,0 +1,47 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file util.h
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+#pragma once
+#include <stdint.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+void debugf(const char *str, ...);
+#else
+#define debugf printf
+#endif
+
+static inline uint32_t min_u32(uint32_t a, uint32_t b)
+{
+	return a < b ? a : b;
+}
+
+static inline uint32_t clamp_u32(uint32_t x, uint32_t min_, uint32_t max_)
+{
+	return x < min_ ? min_ : (x > max_ ? max_ : x);
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp
@ -0,0 +1,29 @@
+{
+  "targets":
+    [{
+      "target_name": "ethash",
+        "sources": [
+            './ethash.cc',
+            '../libethash/ethash.h',
+            '../libethash/util.c',
+            '../libethash/util.h',
+            '../libethash/blum_blum_shub.h',
+            '../libethash/blum_blum_shub.c',
+            '../libethash/sha3.h',
+            '../libethash/sha3.c',
+            '../libethash/internal.h',
+            '../libethash/internal.c'
+          ],
+        "include_dirs": [
+          "../",
+          "<!(node -e \"require('nan')\")"
+        ],
+        "cflags": [
+        "-Wall",
+        "-Wno-maybe-uninitialized",
+        "-Wno-uninitialized",
+        "-Wno-unused-function",
+        "-Wextra"
+          ]
+    }]
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc
@ -0,0 +1,587 @@
+#include <nan.h>
+#include <iostream>
+#include <node.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "../libethash/ethash.h"
+
+using namespace v8;
+
+class EthashValidator : public NanAsyncWorker {
+ public:
+  // Constructor
+  EthashValidator(NanCallback *callback, const unsigned blocknumber, const unsigned char * seed)
+    : NanAsyncWorker(callback), blocknumber(blocknumber), seed(seed) {}
+  // Destructor
+  ~EthashValidator() {
+  	free(this->cache);
+	free(this->params);
+  }
+
+  // Executed inside the worker-thread.
+  // It is not safe to access V8, or V8 data structures
+  // here, so everything we need for input and output
+  // should go on `this`.
+  void Execute () {
+	
+    /* this->result = secp256k1_ecdsa_sign(this->msg, this->sig , &this->sig_len, this->pk, NULL, NULL); */
+  }
+
+  // Executed when the async work is complete
+  // this function will be run inside the main event loop
+  // so it is safe to use V8 again
+  void HandleOKCallback () {
+    NanScope();
+    Handle<Value> argv[] = {
+      NanNew<Number>(this->result)
+    };
+    callback->Call(2, argv);
+  }
+
+ protected:
+  const unsigned blocknumber; 
+  const unsigned char * seed;
+  ethash_params * params;
+  ethash_cache * cache;
+  bool result;
+  bool ready = 0;
+};
+
+/* class CompactSignWorker : public SignWorker { */
+/*  public: */
+/*   CompactSignWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *pk ) */
+/*     : SignWorker(callback, msg, pk){} */
+
+/*   void Execute () { */
+/*     this->result = secp256k1_ecdsa_sign_compact(this->msg, this->sig , this->pk, NULL, NULL,  &this->sig_len); */
+/*   } */
+
+/*   void HandleOKCallback () { */
+/*     NanScope(); */
+/*     Handle<Value> argv[] = { */
+/*       NanNew<Number>(this->result), */
+/*       NanNewBufferHandle((char *)this->sig, 64), */
+/*       NanNew<Number>(this->sig_len) */
+/*     }; */
+/*     callback->Call(3, argv); */
+/*   } */
+/* }; */
+
+/* class RecoverWorker : public NanAsyncWorker { */
+/*  public: */
+/*   // Constructor */
+/*   RecoverWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int compressed, int rec_id) */
+/*     : NanAsyncWorker(callback), msg(msg), sig(sig), compressed(compressed), rec_id(rec_id) {} */
+/*   // Destructor */
+/*   ~RecoverWorker() {} */
+
+/*   void Execute () { */
+/*     if(this->compressed == 1){ */
+/*       this->pubkey = new unsigned char[33]; */ 
+/*     }else{ */
+/*       this->pubkey = new unsigned char[65]; */ 
+/*     } */
+
+/*     this->result = secp256k1_ecdsa_recover_compact(this->msg, this->sig, this->pubkey, &this->pubkey_len, this->compressed, this->rec_id); */
+/*   } */
+
+/*   void HandleOKCallback () { */
+/*     NanScope(); */
+/*     Handle<Value> argv[] = { */
+/*       NanNew<Number>(this->result), */
+/*       NanNewBufferHandle((char *)this->pubkey, this->pubkey_len) */
+/*     }; */
+/*     callback->Call(2, argv); */
+/*   } */
+
+/*  protected: */
+/*   const unsigned char * msg; */
+/*   const unsigned char * sig; */ 
+/*   int compressed; */
+/*   int rec_id; */
+/*   int result; */
+/*   unsigned char * pubkey; */
+/*   int pubkey_len; */
+/* }; */
+
+/* class VerifyWorker : public NanAsyncWorker { */
+/*  public: */
+/*   // Constructor */
+/*   VerifyWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int sig_len, const unsigned char *pub_key, int pub_key_len) */
+/*     : NanAsyncWorker(callback), msg(msg), sig(sig), sig_len(sig_len), pub_key(pub_key), pub_key_len(pub_key_len) {} */
+/*   // Destructor */
+/*   ~VerifyWorker() {} */
+
+/*   void Execute () { */
+/*     this->result = secp256k1_ecdsa_verify(this->msg, this->sig, this->sig_len,  this->pub_key, this->pub_key_len); */
+/*   } */
+
+/*   void HandleOKCallback () { */
+/*     NanScope(); */
+/*     Handle<Value> argv[] = { */
+/*       NanNew<Number>(this->result), */
+/*     }; */
+/*     callback->Call(1, argv); */
+/*   } */
+
+/*  protected: */
+/*   int result; */
+/*   const unsigned char * msg; */
+/*   const unsigned char * sig; */
+/*   int sig_len; */ 
+/*   const unsigned char * pub_key; */
+/*   int pub_key_len; */
+/* }; */
+
+/* NAN_METHOD(Verify){ */
+/*   NanScope(); */
+
+/*   Local<Object> pub_buf = args[0].As<Object>(); */
+/*   const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */
+/*   int pub_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   Local<Object> sig_buf = args[2].As<Object>(); */
+/*   const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */
+/*   int sig_len = node::Buffer::Length(args[2]); */
+
+/*   int result = secp256k1_ecdsa_verify(msg_data, sig_data, sig_len, pub_data, pub_len ); */ 
+
+/*   NanReturnValue(NanNew<Number>(result)); */
+/* } */
+
+/* NAN_METHOD(Verify_Async){ */
+/*   NanScope(); */
+
+/*   Local<Object> pub_buf = args[0].As<Object>(); */
+/*   const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */
+/*   int pub_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   Local<Object> sig_buf = args[2].As<Object>(); */
+/*   const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */
+/*   int sig_len = node::Buffer::Length(args[2]); */
+
+/*   Local<Function> callback = args[3].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   VerifyWorker* worker = new VerifyWorker(nanCallback, msg_data, sig_data, sig_len, pub_data, pub_len); */
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Sign){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> pk_buf = args[0].As<Object>(); */
+/*   const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+/*   //the second argument is the message that we are signing */
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   unsigned char sig[72]; */
+/*   int sig_len = 72; */
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   int result = secp256k1_ecdsa_sign(msg_data, sig , &sig_len, pk_data, NULL, NULL); */
+
+/*   if(result == 1){ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sig, sig_len)); */
+/*   }else{ */
+/*     return NanThrowError("nonce invalid, try another one"); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Sign_Async){ */
+
+/*   NanScope(); */
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> sec_buf = args[0].As<Object>(); */
+/*   const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+/*   //the second argument is the message that we are signing */
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   Local<Function> callback = args[2].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   SignWorker* worker = new SignWorker(nanCallback, msg_data, sec_data); */
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Sign_Compact){ */
+
+/*   NanScope(); */
+
+/*   Local<Object> seckey_buf = args[0].As<Object>(); */
+/*   const unsigned char *seckey_data = (unsigned char *) node::Buffer::Data(seckey_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   unsigned char sig[64]; */
+/*   int rec_id; */
+
+/*   //TODO: change the nonce */
+/*   int valid_nonce = secp256k1_ecdsa_sign_compact(msg_data, sig, seckey_data, NULL, NULL, &rec_id ); */
+
+/*   Local<Array> array = NanNew<Array>(3); */
+/*   array->Set(0, NanNew<Integer>(valid_nonce)); */
+/*   array->Set(1, NanNew<Integer>(rec_id)); */
+/*   array->Set(2, NanNewBufferHandle((char *)sig, 64)); */
+
+/*   NanReturnValue(array); */
+/* } */
+
+/* NAN_METHOD(Sign_Compact_Async){ */
+/*   NanScope(); */
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> sec_buf = args[0].As<Object>(); */
+/*   const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+
+/*   //the second argument is the message that we are signing */
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+
+/*   Local<Function> callback = args[2].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   CompactSignWorker* worker = new CompactSignWorker(nanCallback, msg_data, sec_data); */ 
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Recover_Compact){ */
+
+/*   NanScope(); */
+  
+/*   Local<Object> msg_buf = args[0].As<Object>(); */
+/*   const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */
+/*   int msg_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> sig_buf = args[1].As<Object>(); */
+/*   const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */
+
+/*   Local<Number> compressed = args[2].As<Number>(); */
+/*   int int_compressed = compressed->IntegerValue(); */
+
+/*   Local<Number> rec_id = args[3].As<Number>(); */
+/*   int int_rec_id = rec_id->IntegerValue(); */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   unsigned char pubKey[65]; */ 
+
+/*   int pubKeyLen; */
+
+/*   int result = secp256k1_ecdsa_recover_compact(msg, sig, pubKey, &pubKeyLen, int_compressed, int_rec_id); */
+/*   if(result == 1){ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */
+/*   }else{ */
+    
+/*     NanReturnValue(NanFalse()); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Recover_Compact_Async){ */
+
+/*   NanScope(); */
+  
+/*   //the message */
+/*   Local<Object> msg_buf = args[0].As<Object>(); */
+/*   const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */
+/*   int msg_len = node::Buffer::Length(args[0]); */
+
+/*   //the signature length */
+/*   Local<Object> sig_buf = args[1].As<Object>(); */
+/*   const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */
+/*   //todo sig len needs tobe 64 */
+/*   int sig_len = node::Buffer::Length(args[1]); */
+
+/*   //to compress or not? */
+/*   Local<Number> compressed = args[2].As<Number>(); */
+/*   int int_compressed = compressed->IntegerValue(); */
+
+/*   //the rec_id */
+/*   Local<Number> rec_id = args[3].As<Number>(); */
+/*   int int_rec_id = rec_id->IntegerValue(); */
+
+/*   //the callback */
+/*   Local<Function> callback = args[4].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   if(sig_len != 64){ */
+/*     return NanThrowError("the signature needs to be 64 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   RecoverWorker* worker = new RecoverWorker(nanCallback, msg, sig, int_compressed, int_rec_id); */
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Seckey_Verify){ */
+/*   NanScope(); */
+
+/*   const unsigned char *data = (const unsigned char*) node::Buffer::Data(args[0]); */
+/*   int result =  secp256k1_ec_seckey_verify(data); */ 
+/*   NanReturnValue(NanNew<Number>(result)); */ 
+/* } */
+
+/* NAN_METHOD(Pubkey_Verify){ */
+
+/*   NanScope(); */
+  
+/*   Local<Object> pub_buf = args[0].As<Object>(); */
+/*   const unsigned char *pub_key = (unsigned char *) node::Buffer::Data(pub_buf); */
+/*   int pub_key_len = node::Buffer::Length(args[0]); */
+
+/*   int result = secp256k1_ec_pubkey_verify(pub_key, pub_key_len); */
+
+/*   NanReturnValue(NanNew<Number>(result)); */ 
+/* } */
+
+/* NAN_METHOD(Pubkey_Create){ */
+/*   NanScope(); */
+
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Number> l_compact = args[1].As<Number>(); */
+/*   int compact = l_compact->IntegerValue(); */
+/*   int pubKeyLen; */
+
+/*   if(pk_len != 32){ */
+/*     return NanThrowError("the secert key need to be 32 bytes"); */
+/*   } */
+
+/*   unsigned char *pubKey; */
+/*   if(compact == 1){ */
+/*     pubKey = new unsigned char[33]; */ 
+/*   }else{ */
+/*     pubKey = new unsigned char[65]; */ 
+/*   } */
+
+/*   int results = secp256k1_ec_pubkey_create(pubKey,&pubKeyLen, pk_data, compact ); */
+/*   if(results == 0){ */
+/*     return NanThrowError("secret was invalid, try again."); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Decompress){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> pk_buf = args[0].As<Object>(); */
+/*   unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   int results = secp256k1_ec_pubkey_decompress(pk_data, &pk_len); */
+
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid public key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pk_data, pk_len)); */
+/*   } */
+/* } */
+
+
+/* NAN_METHOD(Privkey_Import){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   unsigned char sec_key[32]; */
+/*   int results = secp256k1_ec_privkey_import(sec_key, pk_data, pk_len); */
+
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid private key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sec_key, 32)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Privkey_Export){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> sk_buf = args[0].As<Object>(); */
+/*   const unsigned char *sk_data = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/*   Local<Number> l_compressed = args[1].As<Number>(); */
+/*   int compressed = l_compressed->IntegerValue(); */
+
+/*   unsigned char *privKey; */
+/*   int pk_len; */
+/*   int results = secp256k1_ec_privkey_export(sk_data, privKey, &pk_len, compressed); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid private key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)privKey, pk_len)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Privkey_Tweak_Add){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> sk_buf = args[0].As<Object>(); */
+/*   unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_privkey_tweak_add(sk, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Privkey_Tweak_Mul){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> sk_buf = args[0].As<Object>(); */
+/*   unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_privkey_tweak_mul(sk, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Tweak_Add){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_pubkey_tweak_add(pk, pk_len, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Tweak_Mul){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_pubkey_tweak_mul(pk, pk_len, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */
+/*   } */
+/* } */
+
+void Init(Handle<Object> exports) {
+
+  /* secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY); */
+  /* exports->Set(NanNew("seckeyVerify"), NanNew<FunctionTemplate>(Seckey_Verify)->GetFunction()); */
+  /* exports->Set(NanNew("sign"), NanNew<FunctionTemplate>(Sign)->GetFunction()); */
+  /* exports->Set(NanNew("signAsync"), NanNew<FunctionTemplate>(Sign_Async)->GetFunction()); */
+  /* exports->Set(NanNew("signCompact"), NanNew<FunctionTemplate>(Sign_Compact)->GetFunction()); */
+  /* exports->Set(NanNew("signCompactAsync"), NanNew<FunctionTemplate>(Sign_Compact_Async)->GetFunction()); */
+  /* exports->Set(NanNew("recoverCompact"), NanNew<FunctionTemplate>(Recover_Compact)->GetFunction()); */
+  /* exports->Set(NanNew("recoverCompactAsync"), NanNew<FunctionTemplate>(Recover_Compact_Async)->GetFunction()); */
+  /* exports->Set(NanNew("verify"), NanNew<FunctionTemplate>(Verify)->GetFunction()); */
+  /* exports->Set(NanNew("verifyAsync"), NanNew<FunctionTemplate>(Verify_Async)->GetFunction()); */
+  /* exports->Set(NanNew("secKeyVerify"), NanNew<FunctionTemplate>(Seckey_Verify)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyVerify"), NanNew<FunctionTemplate>(Pubkey_Verify)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyCreate"), NanNew<FunctionTemplate>(Pubkey_Create)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyDecompress"), NanNew<FunctionTemplate>(Pubkey_Decompress)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyExport"), NanNew<FunctionTemplate>(Privkey_Export)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyImport"), NanNew<FunctionTemplate>(Privkey_Import)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyTweakAdd"), NanNew<FunctionTemplate>(Privkey_Tweak_Add)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyTweakMul"), NanNew<FunctionTemplate>(Privkey_Tweak_Mul)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyTweakAdd"), NanNew<FunctionTemplate>(Privkey_Tweak_Add)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyTweakMul"), NanNew<FunctionTemplate>(Privkey_Tweak_Mul)->GetFunction()); */
+}
+
+NODE_MODULE(secp256k1, Init)
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json
@ -0,0 +1,13 @@
+{
+  "name": "node-ethash",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "install": "node-gyp rebuild"
+  },
+  "author": "",
+  "license": "ISC",
+  "gypfile": true
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md
@ -0,0 +1,12 @@
+# To Develop
+`npm install -g node-gyp`  
+`npm install .`  
+
+
+# To rebuild 
+`node-gyp rebuild`  
+
+
+# notes
+
+nan is good https://github.com/rvagg/nan
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt
@ -0,0 +1,30 @@
+IF( NOT Boost_FOUND )
+    find_package(Boost COMPONENTS unit_test_framework)
+ENDIF()
+
+IF( Boost_FOUND )
+    include_directories( ${Boost_INCLUDE_DIR} )
+    include_directories(..)
+
+    link_directories ( ${Boost_LIBRARY_DIRS} )
+    file(GLOB HEADERS "*.h")
+    ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK)
+
+    if (NOT CRYPTOPP_FOUND)
+	    find_package (CryptoPP)
+    endif()
+
+    if (CRYPTOPP_FOUND)
+	    add_definitions(-DWITH_CRYPTOPP)
+    endif()
+
+    add_executable (Test test.cpp ${HEADERS})
+    target_link_libraries (Test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${ETHHASH_LIBS})
+
+    if (CRYPTOPP_FOUND)
+	    TARGET_LINK_LIBRARIES(Test ${CRYPTOPP_LIBRARIES})
+    endif()
+
+    enable_testing ()
+    add_test(NAME ethash COMMAND Test)
+ENDIF()
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go
@ -0,0 +1,54 @@
+package ethashTest
+
+import (
+	"bytes"
+	"crypto/rand"
+	"log"
+	"math/big"
+	"testing"
+
+	"github.com/ethereum/ethash"
+	"github.com/ethereum/go-ethereum/core"
+	"github.com/ethereum/go-ethereum/ethdb"
+)
+
+func TestEthash(t *testing.T) {
+	seedHash := make([]byte, 32)
+	_, err := rand.Read(seedHash)
+	if err != nil {
+		panic(err)
+	}
+
+	db, err := ethdb.NewMemDatabase()
+	if err != nil {
+		panic(err)
+	}
+
+	blockProcessor, err := core.NewCanonical(5, db)
+	if err != nil {
+		panic(err)
+	}
+
+	log.Println("Block Number: ", blockProcessor.ChainManager().CurrentBlock().Number())
+
+	e := ethash.New(blockProcessor.ChainManager())
+
+	miningHash := make([]byte, 32)
+	if _, err := rand.Read(miningHash); err != nil {
+		panic(err)
+	}
+	diff := big.NewInt(10000)
+	log.Println("difficulty", diff)
+
+	nonce := uint64(0)
+
+	ghash_full := e.FullHash(nonce, miningHash)
+	log.Printf("ethash full (on nonce): %x %x\n", ghash_full, nonce)
+
+	ghash_light := e.LightHash(nonce, miningHash)
+	log.Printf("ethash light (on nonce): %x %x\n", ghash_light, nonce)
+
+	if bytes.Compare(ghash_full, ghash_light) != 0 {
+		t.Errorf("full: %x, light: %x", ghash_full, ghash_light)
+	}
+}
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp
@ -0,0 +1,233 @@
+#include <iomanip>
+#include <libethash/fnv.h>
+#include <libethash/ethash.h>
+#include <libethash/internal.h>
+
+#ifdef WITH_CRYPTOPP
+#include <libethash/sha3_cryptopp.h>
+#else
+#include <libethash/sha3.h>
+#endif // WITH_CRYPTOPP
+
+#define BOOST_TEST_MODULE Daggerhashimoto
+#define BOOST_TEST_MAIN
+
+#include <boost/test/unit_test.hpp>
+#include <libethash/ethash.h>
+#include <iostream>
+
+std::string bytesToHexString(const uint8_t *str, const size_t s) {
+    std::ostringstream ret;
+
+    for (int i = 0; i < s; ++i)
+        ret << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int) str[i];
+
+    return ret.str();
+}
+
+BOOST_AUTO_TEST_CASE(fnv_hash_check) {
+    uint32_t x = 1235U;
+    const uint32_t
+            y = 9999999U,
+            expected = (FNV_PRIME * x) ^ y;
+
+    x = fnv_hash(x, y);
+
+    BOOST_REQUIRE_MESSAGE(x == expected,
+            "\nexpected: " << expected << "\n"
+                    << "actual: " << x << "\n");
+
+}
+
+BOOST_AUTO_TEST_CASE(SHA256_check) {
+    uint8_t input[32], out[32];
+    memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+    SHA3_256(out, input, 32);
+    const std::string
+            expected = "2b5ddf6f4d21c23de216f44d5e4bdc68e044b71897837ea74c83908be7037cd7",
+            actual = bytesToHexString(out, 32);
+    BOOST_REQUIRE_MESSAGE(expected == actual,
+            "\nexpected: " << expected.c_str() << "\n"
+                    << "actual: " << actual.c_str() << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(SHA512_check) {
+    uint8_t input[64], out[64];
+    memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 64);
+    SHA3_512(out, input, 64);
+    const std::string
+            expected = "0be8a1d334b4655fe58c6b38789f984bb13225684e86b20517a55ab2386c7b61c306f25e0627c60064cecd6d80cd67a82b3890bd1289b7ceb473aad56a359405",
+            actual = bytesToHexString(out, 64);
+    BOOST_REQUIRE_MESSAGE(expected == actual,
+            "\nexpected: " << expected.c_str() << "\n"
+                    << "actual: " << actual.c_str() << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_check) {
+    ethash_params params;
+    ethash_params_init(&params, 0);
+    BOOST_REQUIRE_MESSAGE(params.full_size  < DAGSIZE_BYTES_INIT,
+            "\nfull size: " << params.full_size << "\n"
+                    << "should be less than or equal to: " << DAGSIZE_BYTES_INIT << "\n");
+    BOOST_REQUIRE_MESSAGE(params.full_size + 20*MIX_BYTES >= DAGSIZE_BYTES_INIT,
+            "\nfull size + 20*MIX_BYTES: " << params.full_size + 20*MIX_BYTES << "\n"
+                    << "should be greater than or equal to: " << DAGSIZE_BYTES_INIT << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size < DAGSIZE_BYTES_INIT / 32,
+            "\ncache size: " << params.cache_size << "\n"
+                    << "should be less than or equal to: " << DAGSIZE_BYTES_INIT / 32 << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_calcifide_check) {
+    ethash_params params;
+    ethash_params_init(&params, 0);
+    const uint32_t expected_full_size = 1073739904;
+    const uint32_t expected_cache_size = 1048384;
+    BOOST_REQUIRE_MESSAGE(params.full_size  == expected_full_size,
+            "\nexpected: " << expected_cache_size << "\n"
+                    << "actual: " << params.full_size << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size  == expected_cache_size,
+            "\nexpected: " << expected_cache_size << "\n"
+                    << "actual: " << params.cache_size << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_check) {
+    ethash_params params;
+    ethash_params_init(&params, 1971000);
+    const uint64_t nine_month_size = (uint64_t) 8*DAGSIZE_BYTES_INIT;
+    BOOST_REQUIRE_MESSAGE(params.full_size  < nine_month_size,
+            "\nfull size: " << params.full_size << "\n"
+                    << "should be less than or equal to: " << nine_month_size << "\n");
+    BOOST_REQUIRE_MESSAGE(params.full_size + DAGSIZE_BYTES_INIT / 4 > nine_month_size,
+            "\nfull size + DAGSIZE_BYTES_INIT / 4: " << params.full_size + DAGSIZE_BYTES_INIT / 4 << "\n"
+                    << "should be greater than or equal to: " << nine_month_size << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size < nine_month_size / 1024,
+            "\nactual cache size: " << params.cache_size << "\n"
+                    << "expected: " << nine_month_size / 1024 << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 > nine_month_size / 1024 ,
+            "\ncache size + DAGSIZE_BYTES_INIT / 4 / 1024: " << params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 << "\n"
+                    << "actual: " << nine_month_size / 32 << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(light_and_full_client_checks) {
+    ethash_params params;
+    uint8_t seed[32], hash[32];
+    ethash_return_value light_out, full_out;
+    memcpy(seed, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+    memcpy(hash, "~~~X~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+    ethash_params_init(&params, 0);
+    params.cache_size = 1024;
+    params.full_size = 1024 * 32;
+    ethash_cache cache;
+    cache.mem = alloca(params.cache_size);
+    ethash_mkcache(&cache, &params, seed);
+    node * full_mem = (node *) alloca(params.full_size);
+    ethash_compute_full_data(full_mem, &params, &cache);
+
+    {
+        const std::string
+                expected = "2da2b506f21070e1143d908e867962486d6b0a02e31d468fd5e3a7143aafa76a14201f63374314e2a6aaf84ad2eb57105dea3378378965a1b3873453bb2b78f9a8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995c259440b89fa3481c2c33171477c305c8e1e421f8d8f6d59585449d0034f3e421808d8da6bbd0b6378f567647cc6c4ba6c434592b198ad444e7284905b7c6adaf70bf43ec2daa7bd5e8951aa609ab472c124cf9eba3d38cff5091dc3f58409edcc386c743c3bd66f92408796ee1e82dd149eaefbf52b00ce33014a6eb3e50625413b072a58bc01da28262f42cbe4f87d4abc2bf287d15618405a1fe4e386fcdafbb171064bd99901d8f81dd6789396ce5e364ac944bbbd75a7827291c70b42d26385910cd53ca535ab29433dd5c5714d26e0dce95514c5ef866329c12e958097e84462197c2b32087849dab33e88b11da61d52f9dbc0b92cc61f742c07dbbf751c49d7678624ee60dfbe62e5e8c47a03d8247643f3d16ad8c8e663953bcda1f59d7e2d4a9bf0768e789432212621967a8f41121ad1df6ae1fa78782530695414c6213942865b2730375019105cae91a4c17a558d4b63059661d9f108362143107babe0b848de412e4da59168cce82bfbff3c99e022dd6ac1e559db991f2e3f7bb910cefd173e65ed00a8d5d416534e2c8416ff23977dbf3eb7180b75c71580d08ce95efeb9b0afe904ea12285a392aff0c8561ff79fca67f694a62b9e52377485c57cc3598d84cac0a9d27960de0cc31ff9bbfe455acaa62c8aa5d2cce96f345da9afe843d258a99c4eaf3650fc62efd81c7b81cd0d534d2d71eeda7a6e315d540b4473c80f8730037dc2ae3e47b986240cfc65ccc565f0d8cde0bc68a57e39a271dda57440b3598bee19f799611d25731a96b5dbbbefdff6f4f656161462633030d62560ea4e9c161cf78fc96a2ca5aaa32453a6c5dea206f766244e8c9d9a8dc61185ce37f1fc804459c5f07434f8ecb34141b8dcae7eae704c950b55556c5f40140c3714b45eddb02637513268778cbf937a33e4e33183685f9deb31ef54e90161e76d969587dd782eaa94e289420e7c2ee908517f5893a26fdb5873d68f92d118d4bcf98d7a4916794d6ab290045e30f9ea00ca547c584b8482b0331ba1539a0f2714fddc3a0b06b0cfbb6a607b8339c39bcfd6640b1f653e9d70ef6c985b",
+                actual = bytesToHexString((uint8_t const *) cache.mem, params.cache_size);
+
+        BOOST_REQUIRE_MESSAGE(expected == actual,
+                "\nexpected: " << expected.c_str() << "\n"
+                        << "actual: " << actual.c_str() << "\n");
+    }
+
+
+
+    {
+        node node;
+        ethash_calculate_dag_item(&node, 0, &params, &cache);
+        const std::string
+                actual = bytesToHexString((uint8_t const *) &node, sizeof(node)),
+                expected = "b1698f829f90b35455804e5185d78f549fcb1bdce2bee006d4d7e68eb154b596be1427769eb1c3c3e93180c760af75f81d1023da6a0ffbe321c153a7c0103597";
+        BOOST_REQUIRE_MESSAGE(actual == expected,
+                "\n" << "expected: " << expected.c_str() << "\n"
+                        << "actual: " << actual.c_str() << "\n");
+    }
+
+    {
+        for (int i = 0 ; i < params.full_size / sizeof(node) ; ++i ) {
+            for (uint32_t j = 0; j < 32; ++j) {
+                node expected_node;
+                ethash_calculate_dag_item(&expected_node, j, &params, &cache);
+                const std::string
+                        actual = bytesToHexString((uint8_t const *) &(full_mem[j]), sizeof(node)),
+                        expected = bytesToHexString((uint8_t const *) &expected_node, sizeof(node));
+                BOOST_REQUIRE_MESSAGE(actual == expected,
+                        "\ni: " << j << "\n"
+                                << "expected: " << expected.c_str() << "\n"
+                                << "actual: " << actual.c_str() << "\n");
+            }
+        }
+    }
+
+    {
+        uint64_t nonce = 0x7c7c597c;
+        ethash_full(&full_out, full_mem, &params, hash, nonce);
+        ethash_light(&light_out, &cache, &params, hash, nonce);
+        const std::string
+                light_result_string = bytesToHexString(light_out.result, 32),
+                full_result_string = bytesToHexString(full_out.result, 32);
+        BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string,
+                "\nlight result: " << light_result_string.c_str() << "\n"
+                        << "full result: " << full_result_string.c_str() << "\n");
+        const std::string
+                light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32),
+                full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32);
+        BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string,
+                "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n"
+                        << "full mix hash: " << full_mix_hash_string.c_str() << "\n");
+        uint8_t check_hash[32];
+        ethash_quick_hash(check_hash, hash, nonce, full_out.mix_hash);
+        const std::string check_hash_string = bytesToHexString(check_hash, 32);
+        BOOST_REQUIRE_MESSAGE(check_hash_string == full_result_string,
+                "\ncheck hash string: " << check_hash_string.c_str() << "\n"
+                        << "full result: " << full_result_string.c_str() << "\n");
+    }
+    {
+        ethash_full(&full_out, full_mem, &params, hash, 5);
+        std::string
+                light_result_string = bytesToHexString(light_out.result, 32),
+                full_result_string = bytesToHexString(full_out.result, 32);
+
+        BOOST_REQUIRE_MESSAGE(light_result_string != full_result_string,
+                "\nlight result and full result should differ: " << light_result_string.c_str() << "\n");
+
+        ethash_light(&light_out, &cache, &params, hash, 5);
+        light_result_string = bytesToHexString(light_out.result, 32);
+        BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string,
+                "\nlight result and full result should be the same\n"
+                        << "light result: " << light_result_string.c_str() << "\n"
+                        << "full result: " << full_result_string.c_str() << "\n");
+        std::string
+                light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32),
+                full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32);
+        BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string,
+                "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n"
+                        << "full mix hash: " << full_mix_hash_string.c_str() << "\n");
+    }
+}
+
+BOOST_AUTO_TEST_CASE(ethash_check_difficulty_check) {
+    uint8_t hash[32], target[32];
+    memset(hash, 0, 32);
+    memset(target, 0, 32);
+
+    memcpy(hash, "11111111111111111111111111111111", 32);
+    memcpy(target, "22222222222222222222222222222222", 32);
+    BOOST_REQUIRE_MESSAGE(
+            ethash_check_difficulty(hash, target),
+            "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n");
+    BOOST_REQUIRE_MESSAGE(
+            !ethash_check_difficulty(hash, hash),
+            "\nexpected \"" << hash << "\" to have the same difficulty as \"" << hash << "\"\n");
+    memcpy(target, "11111111111111111111111111111112", 32);
+    BOOST_REQUIRE_MESSAGE(
+            ethash_check_difficulty(hash, target),
+            "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n");
+    memcpy(target, "11111111111111111111111111111110", 32);
+    BOOST_REQUIRE_MESSAGE(
+            !ethash_check_difficulty(hash, target),
+            "\nexpected \"" << hash << "\" to have more difficulty than \"" << target << "\"\n");
+}
--- a/core/block_processor.go
+++ b/core/block_processor.go
@ -7,12 +7,12 @@ import (
 	"sync"
 	"time"

+	"github.com/ethereum/ethash"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/ethutil"
 	"github.com/ethereum/go-ethereum/event"
 	"github.com/ethereum/go-ethereum/logger"
 	"github.com/ethereum/go-ethereum/pow"
-	"github.com/ethereum/go-ethereum/pow/ezp"
 	"github.com/ethereum/go-ethereum/state"
 	"gopkg.in/fatih/set.v0"
 )
@ -50,7 +50,7 @@ func NewBlockProcessor(db ethutil.Database, txpool *TxPool, chainManager *ChainM
 	sm := &BlockProcessor{
 		db:       db,
 		mem:      make(map[string]*big.Int),
-		Pow:      ezp.New(),
+		Pow:      ethash.New(chainManager),
 		bc:       chainManager,
 		eventMux: eventMux,
 		txpool:   txpool,
@ -255,6 +255,7 @@ func (sm *BlockProcessor) ValidateBlock(block, parent *types.Block) error {
 		return fmt.Errorf("GasLimit check failed for block %v, %v", block.Header().GasLimit, expl)
 	}

+	// There can be at most one uncle
 	if len(block.Uncles()) > 1 {
 		return ValidationError("Block can only contain one uncle (contained %v)", len(block.Uncles()))
 	}
--- a/core/chain_makers.go
+++ b/core/chain_makers.go
@ -13,21 +13,31 @@ import (
 // So we can generate blocks easily
 type FakePow struct{}

-func (f FakePow) Search(block pow.Block, stop <-chan struct{}) []byte { return nil }
-func (f FakePow) Verify(block pow.Block) bool                         { return true }
-func (f FakePow) GetHashrate() int64                                  { return 0 }
-func (f FakePow) Turbo(bool)                                          {}
+func (f FakePow) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
+	return nil, nil, nil
+}
+func (f FakePow) Verify(block pow.Block) bool { return true }
+func (f FakePow) GetHashrate() int64          { return 0 }
+func (f FakePow) Turbo(bool)                  {}

+// So we can deterministically seed different blockchains
+var (
+	CanonicalSeed = 1
+	ForkSeed      = 2
+)
+
+// Utility functions for making chains on the fly
+// Exposed for sake of testing from other packages (eg. go-ethash)
 func NewBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 	return newBlockFromParent(addr, parent)
 }

-func MakeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database) *types.Block {
-	return makeBlock(bman, parent, i, db)
+func MakeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database, seed int) *types.Block {
+	return makeBlock(bman, parent, i, db, seed)
 }

-func MakeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database) types.Blocks {
-	return makeChain(bman, parent, max, db)
+func MakeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database, seed int) types.Blocks {
+	return makeChain(bman, parent, max, db, seed)
 }

 func NewChainMan(block *types.Block, eventMux *event.TypeMux, db ethutil.Database) *ChainManager {
@ -42,9 +52,9 @@ func NewCanonical(n int, db ethutil.Database) (*BlockProcessor, error) {
 	return newCanonical(n, db)
 }

+// block time is fixed at 10 seconds
 func newBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 	block := types.NewBlock(parent.Hash(), addr, parent.Root(), ethutil.BigPow(2, 32), nil, "")
-
 	block.SetUncles(nil)
 	block.SetTransactions(nil)
 	block.SetReceipts(nil)
@ -52,6 +62,7 @@ func newBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 	header := block.Header()
 	header.Difficulty = CalcDifficulty(block, parent)
 	header.Number = new(big.Int).Add(parent.Header().Number, ethutil.Big1)
+	header.Time = parent.Header().Time + 10
 	header.GasLimit = CalcGasLimit(parent, block)

 	block.Td = parent.Td
@ -60,8 +71,10 @@ func newBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 }

 // Actually make a block by simulating what miner would do
-func makeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database) *types.Block {
+// we seed chains by the first byte of the coinbase
+func makeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database, seed int) *types.Block {
 	addr := ethutil.LeftPadBytes([]byte{byte(i)}, 20)
+	addr[0] = byte(seed)
 	block := newBlockFromParent(addr, parent)
 	state := state.New(block.Root(), db)
 	cbase := state.GetOrNewStateObject(addr)
@ -74,11 +87,11 @@ func makeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Data

 // Make a chain with real blocks
 // Runs ProcessWithParent to get proper state roots
-func makeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database) types.Blocks {
+func makeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database, seed int) types.Blocks {
 	bman.bc.currentBlock = parent
 	blocks := make(types.Blocks, max)
 	for i := 0; i < max; i++ {
-		block := makeBlock(bman, parent, i, db)
+		block := makeBlock(bman, parent, i, db, seed)
 		td, err := bman.processWithParent(block, parent)
 		if err != nil {
 			fmt.Println("process with parent failed", err)
@ -87,9 +100,7 @@ func makeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Da
 		block.Td = td
 		blocks[i] = block
 		parent = block
-		fmt.Printf("New Block: %x\n", block.Hash())
 	}
-	fmt.Println("Done making chain")
 	return blocks
 }

@ -113,7 +124,7 @@ func newBlockProcessor(db ethutil.Database, txpool *TxPool, cman *ChainManager,
 	return bman
 }

-// Make a new canonical chain by running InsertChain
+// Make a new, deterministic canonical chain by running InsertChain
 // on result of makeChain
 func newCanonical(n int, db ethutil.Database) (*BlockProcessor, error) {
 	eventMux := &event.TypeMux{}
@ -125,7 +136,7 @@ func newCanonical(n int, db ethutil.Database) (*BlockProcessor, error) {
 	if n == 0 {
 		return bman, nil
 	}
-	lchain := makeChain(bman, parent, n, db)
-	bman.bc.InsertChain(lchain)
-	return bman, nil
+	lchain := makeChain(bman, parent, n, db, CanonicalSeed)
+	err := bman.bc.InsertChain(lchain)
+	return bman, err
 }
--- a/core/chain_manager_test.go
+++ b/core/chain_manager_test.go
@ -24,12 +24,6 @@ func init() {

 // Test fork of length N starting from block i
 func testFork(t *testing.T, bman *BlockProcessor, i, N int, f func(td1, td2 *big.Int)) {
-	fmt.Println("Testing Fork!")
-	var b *types.Block = nil
-	if i > 0 {
-		b = bman.bc.GetBlockByNumber(uint64(i))
-	}
-	_ = b
 	// switch databases to process the new chain
 	db, err := ethdb.NewMemDatabase()
 	if err != nil {
@ -40,13 +34,25 @@ func testFork(t *testing.T, bman *BlockProcessor, i, N int, f func(td1, td2 *big
 	if err != nil {
 		t.Fatal("could not make new canonical in testFork", err)
 	}
+	// asert the bmans have the same block at i
+	bi1 := bman.bc.GetBlockByNumber(uint64(i)).Hash()
+	bi2 := bman2.bc.GetBlockByNumber(uint64(i)).Hash()
+	if bytes.Compare(bi1, bi2) != 0 {
+		t.Fatal("chains do not have the same hash at height", i)
+	}
+
 	bman2.bc.SetProcessor(bman2)

+	// extend the fork
 	parent := bman2.bc.CurrentBlock()
-	chainB := makeChain(bman2, parent, N, db)
-	bman2.bc.InsertChain(chainB)
+	chainB := makeChain(bman2, parent, N, db, ForkSeed)
+	err = bman2.bc.InsertChain(chainB)
+	if err != nil {
+		t.Fatal("Insert chain error for fork:", err)
+	}

 	tdpre := bman.bc.Td()
+	// Test the fork's blocks on the original chain
 	td, err := testChain(chainB, bman)
 	if err != nil {
 		t.Fatal("expected chainB not to give errors:", err)
@ -55,6 +61,14 @@ func testFork(t *testing.T, bman *BlockProcessor, i, N int, f func(td1, td2 *big
 	f(tdpre, td)
 }

+func printChain(bc *ChainManager) {
+	for i := bc.CurrentBlock().Number().Uint64(); i > 0; i-- {
+		b := bc.GetBlockByNumber(uint64(i))
+		fmt.Printf("\t%x\n", b.Hash())
+	}
+}
+
+// process blocks against a chain
 func testChain(chainB types.Blocks, bman *BlockProcessor) (*big.Int, error) {
 	td := new(big.Int)
 	for _, block := range chainB {
@ -102,12 +116,13 @@ func insertChain(done chan bool, chainMan *ChainManager, chain types.Blocks, t *
 }

 func TestExtendCanonical(t *testing.T) {
+	CanonicalLength := 5
 	db, err := ethdb.NewMemDatabase()
 	if err != nil {
 		t.Fatal("Failed to create db:", err)
 	}
 	// make first chain starting from genesis
-	bman, err := newCanonical(5, db)
+	bman, err := newCanonical(CanonicalLength, db)
 	if err != nil {
 		t.Fatal("Could not make new canonical chain:", err)
 	}
@ -116,11 +131,11 @@ func TestExtendCanonical(t *testing.T) {
 			t.Error("expected chainB to have higher difficulty. Got", td2, "expected more than", td1)
 		}
 	}
-	// Start fork from current height (5)
-	testFork(t, bman, 5, 1, f)
-	testFork(t, bman, 5, 2, f)
-	testFork(t, bman, 5, 5, f)
-	testFork(t, bman, 5, 10, f)
+	// Start fork from current height (CanonicalLength)
+	testFork(t, bman, CanonicalLength, 1, f)
+	testFork(t, bman, CanonicalLength, 2, f)
+	testFork(t, bman, CanonicalLength, 5, f)
+	testFork(t, bman, CanonicalLength, 10, f)
 }

 func TestShorterFork(t *testing.T) {
@ -189,6 +204,7 @@ func TestEqualFork(t *testing.T) {
 	}
 	// Sum of numbers must be equal to 10
 	// for this to be an equal fork
+	testFork(t, bman, 0, 10, f)
 	testFork(t, bman, 1, 9, f)
 	testFork(t, bman, 2, 8, f)
 	testFork(t, bman, 5, 5, f)
@ -215,7 +231,7 @@ func TestBrokenChain(t *testing.T) {
 	}
 	bman2.bc.SetProcessor(bman2)
 	parent := bman2.bc.CurrentBlock()
-	chainB := makeChain(bman2, parent, 5, db2)
+	chainB := makeChain(bman2, parent, 5, db2, ForkSeed)
 	chainB = chainB[1:]
 	_, err = testChain(chainB, bman)
 	if err == nil {
--- a/core/transaction_pool.go
+++ b/core/transaction_pool.go
@ -121,7 +121,7 @@ func (self *TxPool) add(tx *types.Transaction) error {
 	if len(tx.From()) > 0 {
 		from = ethutil.Bytes2Hex(tx.From()[:4])
 	} else {
-		from = "INVALID"
+		return errors.New(fmt.Sprintf("FROM ADDRESS MUST BE POSITIVE (was %v)", tx.From()))
 	}
 	txplogger.Debugf("(t) %x => %s (%v) %x\n", from, to, tx.Value, tx.Hash())

--- a/core/types/block.go
+++ b/core/types/block.go
@ -48,7 +48,20 @@ type Header struct {
 }

 func (self *Header) rlpData(withNonce bool) []interface{} {
-	fields := []interface{}{self.ParentHash, self.UncleHash, self.Coinbase, self.Root, self.TxHash, self.ReceiptHash, self.Bloom, self.Difficulty, self.Number, self.GasLimit, self.GasUsed, self.Time, self.Extra}
+	fields := []interface{}{
+		self.ParentHash,
+		self.UncleHash,
+		self.Coinbase,
+		self.Root,
+		self.TxHash,
+		self.ReceiptHash,
+		self.Bloom,
+		self.Difficulty,
+		self.Number,
+		self.GasLimit,
+		self.GasUsed,
+		self.Time,
+		self.Extra}
 	if withNonce {
 		fields = append(fields, self.Nonce, self.MixDigest, self.SeedHash)
 	}
--- a/miner/agent.go
+++ b/miner/agent.go
@ -69,8 +69,8 @@ done:

 func (self *CpuMiner) mine(block *types.Block) {
 	minerlogger.Infof("(re)started agent[%d]. mining...\n", self.index)
-	nonce := self.pow.Search(block, self.quitCurrentOp)
+	nonce, mixDigest, seedHash := self.pow.Search(block, self.quitCurrentOp)
 	if nonce != nil {
-		self.returnCh <- Work{block.Number().Uint64(), nonce}
+		self.returnCh <- Work{block.Number().Uint64(), nonce, mixDigest, seedHash}
 	}
 }
--- a/miner/worker.go
+++ b/miner/worker.go
@ -42,8 +42,10 @@ func env(block *types.Block, eth core.Backend) *environment {
 }

 type Work struct {
-	Number uint64
-	Nonce  []byte
+	Number    uint64
+	Nonce     []byte
+	MixDigest []byte
+	SeedHash  []byte
 }

 type Agent interface {
@ -138,9 +140,12 @@ out:
 func (self *worker) wait() {
 	for {
 		for work := range self.recv {
+			// Someone Successfully Mined!
 			block := self.current.block
 			if block.Number().Uint64() == work.Number && block.Nonce() == nil {
 				self.current.block.Header().Nonce = work.Nonce
+				self.current.block.Header().MixDigest = work.MixDigest
+				self.current.block.Header().SeedHash = work.SeedHash

 				if err := self.chain.InsertChain(types.Blocks{self.current.block}); err == nil {
 					self.mux.Post(core.NewMinedBlockEvent{self.current.block})
--- a/pow/block.go
+++ b/pow/block.go
@ -1,12 +1,20 @@
 package pow

-import "math/big"
+import (
+	"github.com/ethereum/go-ethereum/core/types"
+	"math/big"
+)

 type Block interface {
 	Difficulty() *big.Int
 	HashNoNonce() []byte
 	Nonce() []byte
-	Number() *big.Int
 	MixDigest() []byte
 	SeedHash() []byte
+	NumberU64() uint64
+}
+
+type ChainManager interface {
+	GetBlockByNumber(uint64) *types.Block
+	CurrentBlock() *types.Block
 }
--- a/pow/dagger/dagger.go
+++ b/pow/dagger/dagger.go
@ -44,7 +44,7 @@ func (dag *Dagger) Find(obj *big.Int, resChan chan int64) {
 	resChan <- 0
 }

-func (dag *Dagger) Search(hash, diff *big.Int) *big.Int {
+func (dag *Dagger) Search(hash, diff *big.Int) ([]byte, []byte, []byte) {
 	// TODO fix multi threading. Somehow it results in the wrong nonce
 	amountOfRoutines := 1

@ -69,7 +69,7 @@ func (dag *Dagger) Search(hash, diff *big.Int) *big.Int {
 		}
 	}

-	return big.NewInt(res)
+	return big.NewInt(res).Bytes(), nil, nil
 }

 func (dag *Dagger) Verify(hash, diff, nonce *big.Int) bool {
--- a/pow/dash/crypto.c
+++ b/pow/dash/crypto.c
@ -1,5 +0,0 @@
-extern char *Sha3(char *, int);
-char *sha3_cgo(char *data, int l)
-{
-	return Sha3(data, l);
-}
--- a/pow/dash/crypto.go
+++ b/pow/dash/crypto.go
@ -1,14 +0,0 @@
-package dash
-
-/*
-char *sha3_cgo(char *, int); // Forward declaration
-*/
-import "C"
-import (
-	"github.com/ethereum/go-ethereum/crypto"
-)
-
-//export Sha3
-func Sha3(data []byte, l int) []byte {
-	return crypto.Sha3(data)
-}
--- a/pow/ezp/pow.go
+++ b/pow/ezp/pow.go
@ -32,7 +32,7 @@ func (pow *EasyPow) Turbo(on bool) {
 	pow.turbo = on
 }

-func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) []byte {
+func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
 	hash := block.HashNoNonce()
 	diff := block.Difficulty()
@ -57,7 +57,7 @@ empty:
 	for {
 		select {
 		case <-stop:
-			return nil
+			return nil, nil, nil
 		default:
 			i++

@ -67,7 +67,7 @@ empty:

 			sha := crypto.Sha3(big.NewInt(r.Int63()).Bytes())
 			if verify(hash, diff, sha) {
-				return sha
+				return sha, nil, nil
 			}
 		}

@ -75,8 +75,6 @@ empty:
 			time.Sleep(20 * time.Microsecond)
 		}
 	}
-
-	return nil
 }

 func (pow *EasyPow) Verify(block pow.Block) bool {
--- a/pow/pow.go
+++ b/pow/pow.go
@ -1,7 +1,7 @@
 package pow

 type PoW interface {
-	Search(block Block, stop <-chan struct{}) []byte
+	Search(block Block, stop <-chan struct{}) ([]byte, []byte, []byte)
 	Verify(block Block) bool
 	GetHashrate() int64
 	Turbo(bool)