all: Add GPU mining, disabled by default

2015-06-12 07:45:23 +02:00 · 2015-06-12 07:45:23 +02:00 · ec6a548ee3
commit ec6a548ee3
parent 8b865fa9bf
42 changed files with 6764 additions and 42 deletions
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@ -16,8 +16,8 @@
 		},
 		{
 			"ImportPath": "github.com/ethereum/ethash",
-			"Comment": "v23.1-234-g062e40a",
+			"Comment": "v23.1-235-gb39e007",
-			"Rev": "062e40a1a1671f5a5102862b56e4c56f68a732f5"
+			"Rev": "b39e007d393ab5945b4c0748a7415b7e31c5db04"
 		},
 		{
 			"ImportPath": "github.com/fatih/color",
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
@ -0,0 +1,26 @@
 /*
 Package cl provides a binding to the OpenCL api. It's mostly a low-level
 wrapper that avoids adding functionality while still making the interface
 a little more friendly and easy to use.
 Resource life-cycle management:
 For any CL object that gets created (buffer, queue, kernel, etc..) you should
 call object.Release() when finished with it to free the CL resources. This
 explicitely calls the related clXXXRelease method for the type. However,
 as a fallback there is a finalizer set for every resource item that takes
 care of it (eventually) if Release isn't called. In this way you can have
 better control over the life cycle of resources while having a fall back
 to avoid leaks. This is similar to how file handles and such are handled
 in the Go standard packages.
 */
 package cl
 // #include "headers/1.2/opencl.h"
 // #cgo CFLAGS: -Iheaders/1.2
 // #cgo darwin LDFLAGS: -framework OpenCL
 // #cgo linux LDFLAGS: -lOpenCL
 import "C"
 import "errors"
 var ErrUnsupported = errors.New("cl: unsupported")
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
@ -0,0 +1,254 @@
 package cl
 import (
 	"math/rand"
 	"reflect"
 	"strings"
 	"testing"
 )
 var kernelSource = `
 __kernel void square(
   __global float* input,
   __global float* output,
   const unsigned int count)
 {
   int i = get_global_id(0);
   if(i < count)
       output[i] = input[i] * input[i];
 }
 `
 func getObjectStrings(object interface{}) map[string]string {
 	v := reflect.ValueOf(object)
 	t := reflect.TypeOf(object)
 	strs := make(map[string]string)
 	numMethods := t.NumMethod()
 	for i := 0; i < numMethods; i++ {
 		method := t.Method(i)
 		if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String {
 			// this is a string-returning method with (presumably) only a pointer receiver parameter
 			// call it
 			outs := v.Method(i).Call([]reflect.Value{})
 			// put the result in our map
 			strs[method.Name] = (outs[0].Interface()).(string)
 		}
 	}
 	return strs
 }
 func TestPlatformStringsContainNoNULs(t *testing.T) {
 	platforms, err := GetPlatforms()
 	if err != nil {
 		t.Fatalf("Failed to get platforms: %+v", err)
 	}
 	for _, p := range platforms {
 		for key, value := range getObjectStrings(p) {
 			if strings.Contains(value, "\x00") {
 				t.Fatalf("platform string %q =  %+q contains NUL", key, value)
 			}
 		}
 	}
 }
 func TestDeviceStringsContainNoNULs(t *testing.T) {
 	platforms, err := GetPlatforms()
 	if err != nil {
 		t.Fatalf("Failed to get platforms: %+v", err)
 	}
 	for _, p := range platforms {
 		devs, err := p.GetDevices(DeviceTypeAll)
 		if err != nil {
 			t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err)
 		}
 		for _, d := range devs {
 			for key, value := range getObjectStrings(d) {
 				if strings.Contains(value, "\x00") {
 					t.Fatalf("device string %q =  %+q contains NUL", key, value)
 				}
 			}
 		}
 	}
 }
 func TestHello(t *testing.T) {
 	var data [1024]float32
 	for i := 0; i < len(data); i++ {
 		data[i] = rand.Float32()
 	}
 	platforms, err := GetPlatforms()
 	if err != nil {
 		t.Fatalf("Failed to get platforms: %+v", err)
 	}
 	for i, p := range platforms {
 		t.Logf("Platform %d:", i)
 		t.Logf("  Name: %s", p.Name())
 		t.Logf("  Vendor: %s", p.Vendor())
 		t.Logf("  Profile: %s", p.Profile())
 		t.Logf("  Version: %s", p.Version())
 		t.Logf("  Extensions: %s", p.Extensions())
 	}
 	platform := platforms[0]
 	devices, err := platform.GetDevices(DeviceTypeAll)
 	if err != nil {
 		t.Fatalf("Failed to get devices: %+v", err)
 	}
 	if len(devices) == 0 {
 		t.Fatalf("GetDevices returned no devices")
 	}
 	deviceIndex := -1
 	for i, d := range devices {
 		if deviceIndex < 0 && d.Type() == DeviceTypeGPU {
 			deviceIndex = i
 		}
 		t.Logf("Device %d (%s): %s", i, d.Type(), d.Name())
 		t.Logf("  Address Bits: %d", d.AddressBits())
 		t.Logf("  Available: %+v", d.Available())
 		// t.Logf("  Built-In Kernels: %s", d.BuiltInKernels())
 		t.Logf("  Compiler Available: %+v", d.CompilerAvailable())
 		t.Logf("  Double FP Config: %s", d.DoubleFPConfig())
 		t.Logf("  Driver Version: %s", d.DriverVersion())
 		t.Logf("  Error Correction Supported: %+v", d.ErrorCorrectionSupport())
 		t.Logf("  Execution Capabilities: %s", d.ExecutionCapabilities())
 		t.Logf("  Extensions: %s", d.Extensions())
 		t.Logf("  Global Memory Cache Type: %s", d.GlobalMemCacheType())
 		t.Logf("  Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024)
 		t.Logf("  Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024))
 		t.Logf("  Half FP Config: %s", d.HalfFPConfig())
 		t.Logf("  Host Unified Memory: %+v", d.HostUnifiedMemory())
 		t.Logf("  Image Support: %+v", d.ImageSupport())
 		t.Logf("  Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight())
 		t.Logf("  Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth())
 		// t.Logf("  Image Max Buffer Size: %d", d.ImageMaxBufferSize())
 		// t.Logf("  Image Max Array Size: %d", d.ImageMaxArraySize())
 		// t.Logf("  Linker Available: %+v", d.LinkerAvailable())
 		t.Logf("  Little Endian: %+v", d.EndianLittle())
 		t.Logf("  Local Mem Size Size: %d KB", d.LocalMemSize()/1024)
 		t.Logf("  Local Mem Type: %s", d.LocalMemType())
 		t.Logf("  Max Clock Frequency: %d", d.MaxClockFrequency())
 		t.Logf("  Max Compute Units: %d", d.MaxComputeUnits())
 		t.Logf("  Max Constant Args: %d", d.MaxConstantArgs())
 		t.Logf("  Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024)
 		t.Logf("  Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024)
 		t.Logf("  Max Parameter Size: %d", d.MaxParameterSize())
 		t.Logf("  Max Read-Image Args: %d", d.MaxReadImageArgs())
 		t.Logf("  Max Samplers: %d", d.MaxSamplers())
 		t.Logf("  Max Work Group Size: %d", d.MaxWorkGroupSize())
 		t.Logf("  Max Work Item Dimensions: %d", d.MaxWorkItemDimensions())
 		t.Logf("  Max Work Item Sizes: %d", d.MaxWorkItemSizes())
 		t.Logf("  Max Write-Image Args: %d", d.MaxWriteImageArgs())
 		t.Logf("  Memory Base Address Alignment: %d", d.MemBaseAddrAlign())
 		t.Logf("  Native Vector Width Char: %d", d.NativeVectorWidthChar())
 		t.Logf("  Native Vector Width Short: %d", d.NativeVectorWidthShort())
 		t.Logf("  Native Vector Width Int: %d", d.NativeVectorWidthInt())
 		t.Logf("  Native Vector Width Long: %d", d.NativeVectorWidthLong())
 		t.Logf("  Native Vector Width Float: %d", d.NativeVectorWidthFloat())
 		t.Logf("  Native Vector Width Double: %d", d.NativeVectorWidthDouble())
 		t.Logf("  Native Vector Width Half: %d", d.NativeVectorWidthHalf())
 		t.Logf("  OpenCL C Version: %s", d.OpenCLCVersion())
 		// t.Logf("  Parent Device: %+v", d.ParentDevice())
 		t.Logf("  Profile: %s", d.Profile())
 		t.Logf("  Profiling Timer Resolution: %d", d.ProfilingTimerResolution())
 		t.Logf("  Vendor: %s", d.Vendor())
 		t.Logf("  Version: %s", d.Version())
 	}
 	if deviceIndex < 0 {
 		deviceIndex = 0
 	}
 	device := devices[deviceIndex]
 	t.Logf("Using device %d", deviceIndex)
 	context, err := CreateContext([]*Device{device})
 	if err != nil {
 		t.Fatalf("CreateContext failed: %+v", err)
 	}
 	// imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
 	// if err != nil {
 	// 	t.Fatalf("GetSupportedImageFormats failed: %+v", err)
 	// }
 	// t.Logf("Supported image formats: %+v", imageFormats)
 	queue, err := context.CreateCommandQueue(device, 0)
 	if err != nil {
 		t.Fatalf("CreateCommandQueue failed: %+v", err)
 	}
 	program, err := context.CreateProgramWithSource([]string{kernelSource})
 	if err != nil {
 		t.Fatalf("CreateProgramWithSource failed: %+v", err)
 	}
 	if err := program.BuildProgram(nil, ""); err != nil {
 		t.Fatalf("BuildProgram failed: %+v", err)
 	}
 	kernel, err := program.CreateKernel("square")
 	if err != nil {
 		t.Fatalf("CreateKernel failed: %+v", err)
 	}
 	for i := 0; i < 3; i++ {
 		name, err := kernel.ArgName(i)
 		if err == ErrUnsupported {
 			break
 		} else if err != nil {
 			t.Errorf("GetKernelArgInfo for name failed: %+v", err)
 			break
 		} else {
 			t.Logf("Kernel arg %d: %s", i, name)
 		}
 	}
 	input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
 	if err != nil {
 		t.Fatalf("CreateBuffer failed for input: %+v", err)
 	}
 	output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
 	if err != nil {
 		t.Fatalf("CreateBuffer failed for output: %+v", err)
 	}
 	if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil {
 		t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err)
 	}
 	if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil {
 		t.Fatalf("SetKernelArgs failed: %+v", err)
 	}
 	local, err := kernel.WorkGroupSize(device)
 	if err != nil {
 		t.Fatalf("WorkGroupSize failed: %+v", err)
 	}
 	t.Logf("Work group size: %d", local)
 	size, _ := kernel.PreferredWorkGroupSizeMultiple(nil)
 	t.Logf("Preferred Work Group Size Multiple: %d", size)
 	global := len(data)
 	d := len(data) % local
 	if d != 0 {
 		global += local - d
 	}
 	if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil {
 		t.Fatalf("EnqueueNDRangeKernel failed: %+v", err)
 	}
 	if err := queue.Finish(); err != nil {
 		t.Fatalf("Finish failed: %+v", err)
 	}
 	results := make([]float32, len(data))
 	if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil {
 		t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err)
 	}
 	correct := 0
 	for i, v := range data {
 		if results[i] == v*v {
 			correct++
 		}
 	}
 	if correct != len(data) {
 		t.Fatalf("%d/%d correct values", correct, len(data))
 	}
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
@ -0,0 +1,161 @@
 package cl
 // #include <stdlib.h>
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import (
 	"runtime"
 	"unsafe"
 )
 const maxImageFormats = 256
 type Context struct {
 	clContext C.cl_context
 	devices   []*Device
 }
 type MemObject struct {
 	clMem C.cl_mem
 	size  int
 }
 func releaseContext(c *Context) {
 	if c.clContext != nil {
 		C.clReleaseContext(c.clContext)
 		c.clContext = nil
 	}
 }
 func releaseMemObject(b *MemObject) {
 	if b.clMem != nil {
 		C.clReleaseMemObject(b.clMem)
 		b.clMem = nil
 	}
 }
 func newMemObject(mo C.cl_mem, size int) *MemObject {
 	memObject := &MemObject{clMem: mo, size: size}
 	runtime.SetFinalizer(memObject, releaseMemObject)
 	return memObject
 }
 func (b *MemObject) Release() {
 	releaseMemObject(b)
 }
 // TODO: properties
 func CreateContext(devices []*Device) (*Context, error) {
 	deviceIds := buildDeviceIdList(devices)
 	var err C.cl_int
 	clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	if clContext == nil {
 		return nil, ErrUnknown
 	}
 	context := &Context{clContext: clContext, devices: devices}
 	runtime.SetFinalizer(context, releaseContext)
 	return context, nil
 }
 func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) {
 	var formats [maxImageFormats]C.cl_image_format
 	var nFormats C.cl_uint
 	if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	fmts := make([]ImageFormat, nFormats)
 	for i, f := range formats[:nFormats] {
 		fmts[i] = ImageFormat{
 			ChannelOrder:    ChannelOrder(f.image_channel_order),
 			ChannelDataType: ChannelDataType(f.image_channel_data_type),
 		}
 	}
 	return fmts, nil
 }
 func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) {
 	var err C.cl_int
 	clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	if clQueue == nil {
 		return nil, ErrUnknown
 	}
 	commandQueue := &CommandQueue{clQueue: clQueue, device: device}
 	runtime.SetFinalizer(commandQueue, releaseCommandQueue)
 	return commandQueue, nil
 }
 func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) {
 	cSources := make([]*C.char, len(sources))
 	for i, s := range sources {
 		cs := C.CString(s)
 		cSources[i] = cs
 		defer C.free(unsafe.Pointer(cs))
 	}
 	var err C.cl_int
 	clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	if clProgram == nil {
 		return nil, ErrUnknown
 	}
 	program := &Program{clProgram: clProgram, devices: ctx.devices}
 	runtime.SetFinalizer(program, releaseProgram)
 	return program, nil
 }
 func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) {
 	var err C.cl_int
 	clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	if clBuffer == nil {
 		return nil, ErrUnknown
 	}
 	return newMemObject(clBuffer, size), nil
 }
 func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) {
 	return ctx.CreateBufferUnsafe(flags, size, nil)
 }
 func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) {
 	return ctx.CreateBufferUnsafe(flags, 4*size, nil)
 }
 func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) {
 	return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0]))
 }
 //float64
 func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) {
 	return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0]))
 }
 func (ctx *Context) CreateUserEvent() (*Event, error) {
 	var err C.cl_int
 	clEvent := C.clCreateUserEvent(ctx.clContext, &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	return newEvent(clEvent), nil
 }
 func (ctx *Context) Release() {
 	releaseContext(ctx)
 }
 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
 // func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
@ -0,0 +1,510 @@
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #include "cl_ext.h"
 // #endif
 import "C"
 import (
 	"strings"
 	"unsafe"
 )
 const maxDeviceCount = 64
 type DeviceType uint
 const (
 	DeviceTypeCPU         DeviceType = C.CL_DEVICE_TYPE_CPU
 	DeviceTypeGPU         DeviceType = C.CL_DEVICE_TYPE_GPU
 	DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR
 	DeviceTypeDefault     DeviceType = C.CL_DEVICE_TYPE_DEFAULT
 	DeviceTypeAll         DeviceType = C.CL_DEVICE_TYPE_ALL
 )
 type FPConfig int
 const (
 	FPConfigDenorm         FPConfig = C.CL_FP_DENORM           // denorms are supported
 	FPConfigInfNaN         FPConfig = C.CL_FP_INF_NAN          // INF and NaNs are supported
 	FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
 	FPConfigRoundToZero    FPConfig = C.CL_FP_ROUND_TO_ZERO    // round to zero rounding mode supported
 	FPConfigRoundToInf     FPConfig = C.CL_FP_ROUND_TO_INF     // round to positive and negative infinity rounding modes supported
 	FPConfigFMA            FPConfig = C.CL_FP_FMA              // IEEE754-2008 fused multiply-add is supported
 	FPConfigSoftFloat      FPConfig = C.CL_FP_SOFT_FLOAT       // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
 )
 var fpConfigNameMap = map[FPConfig]string{
 	FPConfigDenorm:         "Denorm",
 	FPConfigInfNaN:         "InfNaN",
 	FPConfigRoundToNearest: "RoundToNearest",
 	FPConfigRoundToZero:    "RoundToZero",
 	FPConfigRoundToInf:     "RoundToInf",
 	FPConfigFMA:            "FMA",
 	FPConfigSoftFloat:      "SoftFloat",
 }
 func (c FPConfig) String() string {
 	var parts []string
 	for bit, name := range fpConfigNameMap {
 		if c&bit != 0 {
 			parts = append(parts, name)
 		}
 	}
 	if parts == nil {
 		return ""
 	}
 	return strings.Join(parts, "|")
 }
 func (dt DeviceType) String() string {
 	var parts []string
 	if dt&DeviceTypeCPU != 0 {
 		parts = append(parts, "CPU")
 	}
 	if dt&DeviceTypeGPU != 0 {
 		parts = append(parts, "GPU")
 	}
 	if dt&DeviceTypeAccelerator != 0 {
 		parts = append(parts, "Accelerator")
 	}
 	if dt&DeviceTypeDefault != 0 {
 		parts = append(parts, "Default")
 	}
 	if parts == nil {
 		parts = append(parts, "None")
 	}
 	return strings.Join(parts, "|")
 }
 type Device struct {
 	id C.cl_device_id
 }
 func buildDeviceIdList(devices []*Device) []C.cl_device_id {
 	deviceIds := make([]C.cl_device_id, len(devices))
 	for i, d := range devices {
 		deviceIds[i] = d.id
 	}
 	return deviceIds
 }
 // Obtain the list of devices available on a platform. 'platform' refers
 // to the platform returned by GetPlatforms or can be nil. If platform
 // is nil, the behavior is implementation-defined.
 func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) {
 	var deviceIds [maxDeviceCount]C.cl_device_id
 	var numDevices C.cl_uint
 	var platformId C.cl_platform_id
 	if platform != nil {
 		platformId = platform.id
 	}
 	if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	if numDevices > maxDeviceCount {
 		numDevices = maxDeviceCount
 	}
 	devices := make([]*Device, numDevices)
 	for i := 0; i < int(numDevices); i++ {
 		devices[i] = &Device{id: deviceIds[i]}
 	}
 	return devices, nil
 }
 func (d *Device) nullableId() C.cl_device_id {
 	if d == nil {
 		return nil
 	}
 	return d.id
 }
 func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) {
 	var strC [1024]C.char
 	var strN C.size_t
 	if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS {
 		if panicOnError {
 			panic("Should never fail")
 		}
 		return "", toError(err)
 	}
 	// OpenCL strings are NUL-terminated, and the terminator is included in strN
 	// Go strings aren't NUL-terminated, so subtract 1 from the length
 	return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil
 }
 func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) {
 	var val C.cl_uint
 	if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
 		if panicOnError {
 			panic("Should never fail")
 		}
 		return 0, toError(err)
 	}
 	return uint(val), nil
 }
 func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) {
 	var val C.size_t
 	if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
 		if panicOnError {
 			panic("Should never fail")
 		}
 		return 0, toError(err)
 	}
 	return int(val), nil
 }
 func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) {
 	var val C.cl_ulong
 	if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
 		if panicOnError {
 			panic("Should never fail")
 		}
 		return 0, toError(err)
 	}
 	return int64(val), nil
 }
 func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) {
 	var val C.cl_bool
 	if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
 		if panicOnError {
 			panic("Should never fail")
 		}
 		return false, toError(err)
 	}
 	return val == C.CL_TRUE, nil
 }
 func (d *Device) Name() string {
 	str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true)
 	return str
 }
 func (d *Device) Vendor() string {
 	str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true)
 	return str
 }
 func (d *Device) Extensions() string {
 	str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true)
 	return str
 }
 func (d *Device) OpenCLCVersion() string {
 	str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true)
 	return str
 }
 func (d *Device) Profile() string {
 	str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true)
 	return str
 }
 func (d *Device) Version() string {
 	str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true)
 	return str
 }
 func (d *Device) DriverVersion() string {
 	str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true)
 	return str
 }
 // The default compute device address space size specified as an
 // unsigned integer value in bits. Currently supported values are 32 or 64 bits.
 func (d *Device) AddressBits() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true)
 	return int(val)
 }
 // Size of global memory cache line in bytes.
 func (d *Device) GlobalMemCachelineSize() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true)
 	return int(val)
 }
 // Maximum configured clock frequency of the device in MHz.
 func (d *Device) MaxClockFrequency() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true)
 	return int(val)
 }
 // The number of parallel compute units on the OpenCL device.
 // A work-group executes on a single compute unit. The minimum value is 1.
 func (d *Device) MaxComputeUnits() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true)
 	return int(val)
 }
 // Max number of arguments declared with the __constant qualifier in a kernel.
 // The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) MaxConstantArgs() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true)
 	return int(val)
 }
 // Max number of simultaneous image objects that can be read by a kernel.
 // The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) MaxReadImageArgs() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true)
 	return int(val)
 }
 // Maximum number of samplers that can be used in a kernel. The minimum
 // value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
 func (d *Device) MaxSamplers() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true)
 	return int(val)
 }
 // Maximum dimensions that specify the global and local work-item IDs used
 // by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
 // The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) MaxWorkItemDimensions() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true)
 	return int(val)
 }
 // Max number of simultaneous image objects that can be written to by a
 // kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) MaxWriteImageArgs() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true)
 	return int(val)
 }
 // The minimum value is the size (in bits) of the largest OpenCL built-in
 // data type supported by the device (long16 in FULL profile, long16 or
 // int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) MemBaseAddrAlign() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthChar() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthShort() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthInt() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthLong() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthFloat() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthDouble() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true)
 	return int(val)
 }
 func (d *Device) NativeVectorWidthHalf() int {
 	val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true)
 	return int(val)
 }
 // Max height of 2D image in pixels. The minimum value is 8192
 // if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) Image2DMaxHeight() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true)
 	return int(val)
 }
 // Max width of 2D image or 1D image not created from a buffer object in
 // pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) Image2DMaxWidth() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true)
 	return int(val)
 }
 // Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) Image3DMaxDepth() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true)
 	return int(val)
 }
 // Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) Image3DMaxHeight() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true)
 	return int(val)
 }
 // Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) Image3DMaxWidth() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true)
 	return int(val)
 }
 // Max size in bytes of the arguments that can be passed to a kernel. The
 // minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 // For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
 func (d *Device) MaxParameterSize() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true)
 	return int(val)
 }
 // Maximum number of work-items in a work-group executing a kernel on a
 // single compute unit, using the data parallel execution model. (Refer
 // to clEnqueueNDRangeKernel). The minimum value is 1.
 func (d *Device) MaxWorkGroupSize() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true)
 	return int(val)
 }
 // Describes the resolution of device timer. This is measured in nanoseconds.
 func (d *Device) ProfilingTimerResolution() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true)
 	return int(val)
 }
 // Size of local memory arena in bytes. The minimum value is 32 KB for
 // devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) LocalMemSize() int64 {
 	val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true)
 	return val
 }
 // Max size in bytes of a constant buffer allocation. The minimum value is
 // 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) MaxConstantBufferSize() int64 {
 	val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true)
 	return val
 }
 // Max size of memory object allocation in bytes. The minimum value is max
 // (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
 // not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) MaxMemAllocSize() int64 {
 	val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true)
 	return val
 }
 // Size of global device memory in bytes.
 func (d *Device) GlobalMemSize() int64 {
 	val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true)
 	return val
 }
 func (d *Device) Available() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true)
 	return val
 }
 func (d *Device) CompilerAvailable() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true)
 	return val
 }
 func (d *Device) EndianLittle() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true)
 	return val
 }
 // Is CL_TRUE if the device implements error correction for all
 // accesses to compute device memory (global and constant). Is
 // CL_FALSE if the device does not implement such error correction.
 func (d *Device) ErrorCorrectionSupport() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true)
 	return val
 }
 func (d *Device) HostUnifiedMemory() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true)
 	return val
 }
 func (d *Device) ImageSupport() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true)
 	return val
 }
 func (d *Device) Type() DeviceType {
 	var deviceType C.cl_device_type
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS {
 		panic("Failed to get device type")
 	}
 	return DeviceType(deviceType)
 }
 // Describes double precision floating-point capability of the OpenCL device
 func (d *Device) DoubleFPConfig() FPConfig {
 	var fpConfig C.cl_device_fp_config
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS {
 		panic("Failed to get double FP config")
 	}
 	return FPConfig(fpConfig)
 }
 // Describes the OPTIONAL half precision floating-point capability of the OpenCL device
 func (d *Device) HalfFPConfig() FPConfig {
 	var fpConfig C.cl_device_fp_config
 	err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil)
 	if err != C.CL_SUCCESS {
 		return FPConfig(0)
 	}
 	return FPConfig(fpConfig)
 }
 // Type of local memory supported. This can be set to CL_LOCAL implying dedicated
 // local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
 // can also be returned indicating no local memory support.
 func (d *Device) LocalMemType() LocalMemType {
 	var memType C.cl_device_local_mem_type
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
 		return LocalMemType(C.CL_NONE)
 	}
 	return LocalMemType(memType)
 }
 // Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
 func (d *Device) ExecutionCapabilities() ExecCapability {
 	var execCap C.cl_device_exec_capabilities
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS {
 		panic("Failed to get execution capabilities")
 	}
 	return ExecCapability(execCap)
 }
 func (d *Device) GlobalMemCacheType() MemCacheType {
 	var memType C.cl_device_mem_cache_type
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
 		return MemCacheType(C.CL_NONE)
 	}
 	return MemCacheType(memType)
 }
 // Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
 //
 // Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
 //
 // The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
 func (d *Device) MaxWorkItemSizes() []int {
 	dims := d.MaxWorkItemDimensions()
 	sizes := make([]C.size_t, dims)
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS {
 		panic("Failed to get max work item sizes")
 	}
 	intSizes := make([]int, dims)
 	for i, s := range sizes {
 		intSizes[i] = int(s)
 	}
 	return intSizes
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
@ -0,0 +1,51 @@
 // +build cl12
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import "unsafe"
 const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
 func init() {
 	fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt"
 }
 func (d *Device) BuiltInKernels() string {
 	str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true)
 	return str
 }
 // Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
 func (d *Device) LinkerAvailable() bool {
 	val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true)
 	return val
 }
 func (d *Device) ParentDevice() *Device {
 	var deviceId C.cl_device_id
 	if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS {
 		panic("ParentDevice failed")
 	}
 	if deviceId == nil {
 		return nil
 	}
 	return &Device{id: deviceId}
 }
 // Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
 func (d *Device) ImageMaxBufferSize() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true)
 	return int(val)
 }
 // Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
 func (d *Device) ImageMaxArraySize() int {
 	val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true)
 	return int(val)
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
@ -0,0 +1,315 @@
 /*******************************************************************************
 * Copyright (c) 2008-2013 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
 * "Materials"), to deal in the Materials without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Materials, and to
 * permit persons to whom the Materials are furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 ******************************************************************************/
 /* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
 /* cl_ext.h contains OpenCL extensions which don't have external */
 /* (OpenGL, D3D) dependencies.                                   */
 #ifndef __CL_EXT_H
 #define __CL_EXT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #ifdef __APPLE__
 #include <AvailabilityMacros.h>
 #endif
 #include <cl.h>
 /* cl_khr_fp16 extension - no extension #define since it has no functions  */
 #define CL_DEVICE_HALF_FP_CONFIG                    0x1033
 /* Memory object destruction
 *
 * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
 *
 * Registers a user callback function that will be called when the memory object is deleted and its resources 
 * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback 
 * stack associated with memobj. The registered user callback functions are called in the reverse order in 
 * which they were registered. The user callback functions are called and then the memory object is deleted 
 * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be 
 * notified when the memory referenced by host_ptr, specified when the memory object is created and used as 
 * the storage bits for the memory object, can be reused or freed.
 *
 * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
 *
 * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 * before using.
 */
 #define cl_APPLE_SetMemObjectDestructor 1
 cl_int  CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem /* memobj */, 
                                        void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
                                        void * /*user_data */ )             CL_EXT_SUFFIX__VERSION_1_0;  
 /* Context Logging Functions
 *
 * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
 * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 * before using.
 *
 * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger 
 */
 #define cl_APPLE_ContextLoggingFunctions 1
 extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE(  const char * /* errstr */, 
                                            const void * /* private_info */, 
                                            size_t       /* cb */, 
                                            void *       /* user_data */ )  CL_EXT_SUFFIX__VERSION_1_0;
 /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
 extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE(   const char * /* errstr */, 
                                          const void * /* private_info */, 
                                          size_t       /* cb */, 
                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
 /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
 extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * /* errstr */, 
                                          const void * /* private_info */, 
                                          size_t       /* cb */, 
                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
 /************************ 
 * cl_khr_icd extension *                                                  
 ************************/
 #define cl_khr_icd 1
 /* cl_platform_info                                                        */
 #define CL_PLATFORM_ICD_SUFFIX_KHR                  0x0920
 /* Additional Error Codes                                                  */
 #define CL_PLATFORM_NOT_FOUND_KHR                   -1001
 extern CL_API_ENTRY cl_int CL_API_CALL
 clIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
                       cl_platform_id * /* platforms */,
                       cl_uint *        /* num_platforms */);
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
    cl_uint          /* num_entries */,
    cl_platform_id * /* platforms */,
    cl_uint *        /* num_platforms */);
 /* Extension: cl_khr_image2D_buffer
 *
 * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
 * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
 * Both the sampler and sampler-less read_image built-in functions are supported for 2D images
 * and 2D images created from a buffer.  Similarly, the write_image built-ins are also supported
 * for 2D images created from a buffer.
 *
 * When the 2D image from buffer is created, the client must specify the width,
 * height, image format (i.e. channel order and channel data type) and optionally the row pitch
 *
 * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
 * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
 */
 /*************************************
 * cl_khr_initalize_memory extension *
 *************************************/
 #define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x200E
 /**************************************
 * cl_khr_terminate_context extension *
 **************************************/
 #define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x200F
 #define CL_CONTEXT_TERMINATE_KHR                    0x2010
 #define cl_khr_terminate_context 1
 extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
 /*
 * Extension: cl_khr_spir
 *
 * This extension adds support to create an OpenCL program object from a 
 * Standard Portable Intermediate Representation (SPIR) instance
 */
 #define CL_DEVICE_SPIR_VERSIONS                     0x40E0
 #define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE         0x40E1
 /******************************************
 * cl_nv_device_attribute_query extension *
 ******************************************/
 /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
 #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV       0x4000
 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV       0x4001
 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV            0x4002
 #define CL_DEVICE_WARP_SIZE_NV                      0x4003
 #define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
 #define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
 /*********************************
 * cl_amd_device_attribute_query *
 *********************************/
 #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
 /*********************************
 * cl_arm_printf extension
 *********************************/
 #define CL_PRINTF_CALLBACK_ARM                      0x40B0
 #define CL_PRINTF_BUFFERSIZE_ARM                    0x40B1
 #ifdef CL_VERSION_1_1
   /***********************************
    * cl_ext_device_fission extension *
    ***********************************/
    #define cl_ext_device_fission   1
    extern CL_API_ENTRY cl_int CL_API_CALL
    clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
    typedef CL_API_ENTRY cl_int 
    (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
    extern CL_API_ENTRY cl_int CL_API_CALL
    clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
    typedef CL_API_ENTRY cl_int 
    (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
    typedef cl_ulong  cl_device_partition_property_ext;
    extern CL_API_ENTRY cl_int CL_API_CALL
    clCreateSubDevicesEXT(  cl_device_id /*in_device*/,
                            const cl_device_partition_property_ext * /* properties */,
                            cl_uint /*num_entries*/,
                            cl_device_id * /*out_devices*/,
                            cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
    typedef CL_API_ENTRY cl_int 
    ( CL_API_CALL * clCreateSubDevicesEXT_fn)(  cl_device_id /*in_device*/,
                                                const cl_device_partition_property_ext * /* properties */,
                                                cl_uint /*num_entries*/,
                                                cl_device_id * /*out_devices*/,
                                                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
    /* cl_device_partition_property_ext */
    #define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
    #define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
    #define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
    #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
    /* clDeviceGetInfo selectors */
    #define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
    #define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
    #define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
    #define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
    #define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
    /* error codes */
    #define CL_DEVICE_PARTITION_FAILED_EXT              -1057
    #define CL_INVALID_PARTITION_COUNT_EXT              -1058
    #define CL_INVALID_PARTITION_NAME_EXT               -1059
    /* CL_AFFINITY_DOMAINs */
    #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
    #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
    #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
    #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
    #define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
    #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
    /* cl_device_partition_property_ext list terminators */
    #define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
    #define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
    #define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
 /*********************************
 * cl_qcom_ext_host_ptr extension
 *********************************/
 #define CL_MEM_EXT_HOST_PTR_QCOM                  (1 << 29)
 #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM   0x40A0      
 #define CL_DEVICE_PAGE_SIZE_QCOM                  0x40A1
 #define CL_IMAGE_ROW_ALIGNMENT_QCOM               0x40A2
 #define CL_IMAGE_SLICE_ALIGNMENT_QCOM             0x40A3
 #define CL_MEM_HOST_UNCACHED_QCOM                 0x40A4
 #define CL_MEM_HOST_WRITEBACK_QCOM                0x40A5
 #define CL_MEM_HOST_WRITETHROUGH_QCOM             0x40A6
 #define CL_MEM_HOST_WRITE_COMBINING_QCOM          0x40A7
 typedef cl_uint                                   cl_image_pitch_info_qcom;
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetDeviceImageInfoQCOM(cl_device_id             device,
                         size_t                   image_width,
                         size_t                   image_height,
                         const cl_image_format   *image_format,
                         cl_image_pitch_info_qcom param_name,
                         size_t                   param_value_size,
                         void                    *param_value,
                         size_t                  *param_value_size_ret);
 typedef struct _cl_mem_ext_host_ptr
 {
    /* Type of external memory allocation. */
    /* Legal values will be defined in layered extensions. */
    cl_uint  allocation_type;
 	/* Host cache policy for this external memory allocation. */
    cl_uint  host_cache_policy;
 } cl_mem_ext_host_ptr;
 /*********************************
 * cl_qcom_ion_host_ptr extension
 *********************************/
 #define CL_MEM_ION_HOST_PTR_QCOM                  0x40A8
 typedef struct _cl_mem_ion_host_ptr
 {
    /* Type of external memory allocation. */
    /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
    cl_mem_ext_host_ptr  ext_host_ptr;
    /* ION file descriptor */
    int                  ion_filedesc;
    /* Host pointer to the ION allocated memory */
    void*                ion_hostptr;
 } cl_mem_ion_host_ptr;
 #endif /* CL_VERSION_1_1 */
 #ifdef __cplusplus
 }
 #endif
 #endif /* __CL_EXT_H */
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
@ -0,0 +1,158 @@
 /**********************************************************************************
 * Copyright (c) 2008 - 2012 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
 * "Materials"), to deal in the Materials without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Materials, and to
 * permit persons to whom the Materials are furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 **********************************************************************************/
 #ifndef __OPENCL_CL_GL_H
 #define __OPENCL_CL_GL_H
 #include <cl.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef cl_uint     cl_gl_object_type;
 typedef cl_uint     cl_gl_texture_info;
 typedef cl_uint     cl_gl_platform_info;
 typedef struct __GLsync *cl_GLsync;
 /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
 #define CL_GL_OBJECT_BUFFER                     0x2000
 #define CL_GL_OBJECT_TEXTURE2D                  0x2001
 #define CL_GL_OBJECT_TEXTURE3D                  0x2002
 #define CL_GL_OBJECT_RENDERBUFFER               0x2003
 #define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
 #define CL_GL_OBJECT_TEXTURE1D                  0x200F
 #define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
 #define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
 /* cl_gl_texture_info           */
 #define CL_GL_TEXTURE_TARGET                    0x2004
 #define CL_GL_MIPMAP_LEVEL                      0x2005
 #define CL_GL_NUM_SAMPLES                       0x2012
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateFromGLBuffer(cl_context     /* context */,
                     cl_mem_flags   /* flags */,
                     cl_GLuint      /* bufobj */,
                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateFromGLTexture(cl_context      /* context */,
                      cl_mem_flags    /* flags */,
                      cl_GLenum       /* target */,
                      cl_GLint        /* miplevel */,
                      cl_GLuint       /* texture */,
                      cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateFromGLRenderbuffer(cl_context   /* context */,
                           cl_mem_flags /* flags */,
                           cl_GLuint    /* renderbuffer */,
                           cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetGLObjectInfo(cl_mem                /* memobj */,
                  cl_gl_object_type *   /* gl_object_type */,
                  cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetGLTextureInfo(cl_mem               /* memobj */,
                   cl_gl_texture_info   /* param_name */,
                   size_t               /* param_value_size */,
                   void *               /* param_value */,
                   size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
                          cl_uint               /* num_objects */,
                          const cl_mem *        /* mem_objects */,
                          cl_uint               /* num_events_in_wait_list */,
                          const cl_event *      /* event_wait_list */,
                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
                          cl_uint               /* num_objects */,
                          const cl_mem *        /* mem_objects */,
                          cl_uint               /* num_events_in_wait_list */,
                          const cl_event *      /* event_wait_list */,
                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 /* Deprecated OpenCL 1.1 APIs */
 extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
 clCreateFromGLTexture2D(cl_context      /* context */,
                        cl_mem_flags    /* flags */,
                        cl_GLenum       /* target */,
                        cl_GLint        /* miplevel */,
                        cl_GLuint       /* texture */,
                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
 clCreateFromGLTexture3D(cl_context      /* context */,
                        cl_mem_flags    /* flags */,
                        cl_GLenum       /* target */,
                        cl_GLint        /* miplevel */,
                        cl_GLuint       /* texture */,
                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 /* cl_khr_gl_sharing extension  */
 #define cl_khr_gl_sharing 1
 typedef cl_uint     cl_gl_context_info;
 /* Additional Error Codes  */
 #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
 /* cl_gl_context_info  */
 #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
 #define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
 /* Additional cl_context_properties  */
 #define CL_GL_CONTEXT_KHR                       0x2008
 #define CL_EGL_DISPLAY_KHR                      0x2009
 #define CL_GLX_DISPLAY_KHR                      0x200A
 #define CL_WGL_HDC_KHR                          0x200B
 #define CL_CGL_SHAREGROUP_KHR                   0x200C
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
                      cl_gl_context_info            /* param_name */,
                      size_t                        /* param_value_size */,
                      void *                        /* param_value */,
                      size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
    const cl_context_properties * properties,
    cl_gl_context_info            param_name,
    size_t                        param_value_size,
    void *                        param_value,
    size_t *                      param_value_size_ret);
 #ifdef __cplusplus
 }
 #endif
 #endif  /* __OPENCL_CL_GL_H */
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
@ -0,0 +1,65 @@
 /**********************************************************************************
 * Copyright (c) 2008-2012 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
 * "Materials"), to deal in the Materials without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Materials, and to
 * permit persons to whom the Materials are furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 **********************************************************************************/
 /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
 /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have           */
 /* OpenGL dependencies.                                                         */
 #ifndef __OPENCL_CL_GL_EXT_H
 #define __OPENCL_CL_GL_EXT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <cl_gl.h>
 /*
 * For each extension, follow this template
 *  cl_VEN_extname extension  */
 /* #define cl_VEN_extname 1
 * ... define new types, if any
 * ... define new tokens, if any
 * ... define new APIs, if any
 *
 *  If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
 *  This allows us to avoid having to decide whether to include GL headers or GLES here.
 */
 /* 
 *  cl_khr_gl_event  extension
 *  See section 9.9 in the OpenCL 1.1 spec for more information
 */
 #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
 extern CL_API_ENTRY cl_event CL_API_CALL
 clCreateEventFromGLsyncKHR(cl_context           /* context */,
                           cl_GLsync            /* cl_GLsync */,
                           cl_int *             /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
 #ifdef __cplusplus
 }
 #endif
 #endif	/* __OPENCL_CL_GL_EXT_H  */
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
@ -0,0 +1,43 @@
 /*******************************************************************************
 * Copyright (c) 2008-2012 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
 * "Materials"), to deal in the Materials without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Materials, and to
 * permit persons to whom the Materials are furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 ******************************************************************************/
 /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
 #ifndef __OPENCL_H
 #define __OPENCL_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <cl.h>
 #include <cl_gl.h>
 #include <cl_gl_ext.h>
 #include <cl_ext.h>
 #ifdef __cplusplus
 }
 #endif
 #endif  /* __OPENCL_H   */
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
@ -0,0 +1,83 @@
 // +build cl12
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import (
 	"image"
 	"unsafe"
 )
 func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) {
 	format := imageFormat.toCl()
 	desc := imageDesc.toCl()
 	var dataPtr unsafe.Pointer
 	if data != nil {
 		dataPtr = unsafe.Pointer(&data[0])
 	}
 	var err C.cl_int
 	clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	if clBuffer == nil {
 		return nil, ErrUnknown
 	}
 	return newMemObject(clBuffer, len(data)), nil
 }
 func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) {
 	format := ImageFormat{channelOrder, channelDataType}
 	desc := ImageDescription{
 		Type:   MemObjectTypeImage2D,
 		Width:  width,
 		Height: height,
 	}
 	return ctx.CreateImage(flags, format, desc, data)
 }
 func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) {
 	switch m := img.(type) {
 	case *image.Gray:
 		format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8}
 		desc := ImageDescription{
 			Type:     MemObjectTypeImage2D,
 			Width:    m.Bounds().Dx(),
 			Height:   m.Bounds().Dy(),
 			RowPitch: m.Stride,
 		}
 		return ctx.CreateImage(flags, format, desc, m.Pix)
 	case *image.RGBA:
 		format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8}
 		desc := ImageDescription{
 			Type:     MemObjectTypeImage2D,
 			Width:    m.Bounds().Dx(),
 			Height:   m.Bounds().Dy(),
 			RowPitch: m.Stride,
 		}
 		return ctx.CreateImage(flags, format, desc, m.Pix)
 	}
 	b := img.Bounds()
 	w := b.Dx()
 	h := b.Dy()
 	data := make([]byte, w*h*4)
 	dataOffset := 0
 	for y := 0; y < h; y++ {
 		for x := 0; x < w; x++ {
 			c := img.At(x+b.Min.X, y+b.Min.Y)
 			r, g, b, a := c.RGBA()
 			data[dataOffset] = uint8(r >> 8)
 			data[dataOffset+1] = uint8(g >> 8)
 			data[dataOffset+2] = uint8(b >> 8)
 			data[dataOffset+3] = uint8(a >> 8)
 			dataOffset += 4
 		}
 	}
 	return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data)
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
@ -0,0 +1,127 @@
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import (
 	"fmt"
 	"unsafe"
 )
 type ErrUnsupportedArgumentType struct {
 	Index int
 	Value interface{}
 }
 func (e ErrUnsupportedArgumentType) Error() string {
 	return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value)
 }
 type Kernel struct {
 	clKernel C.cl_kernel
 	name     string
 }
 type LocalBuffer int
 func releaseKernel(k *Kernel) {
 	if k.clKernel != nil {
 		C.clReleaseKernel(k.clKernel)
 		k.clKernel = nil
 	}
 }
 func (k *Kernel) Release() {
 	releaseKernel(k)
 }
 func (k *Kernel) SetArgs(args ...interface{}) error {
 	for index, arg := range args {
 		if err := k.SetArg(index, arg); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 func (k *Kernel) SetArg(index int, arg interface{}) error {
 	switch val := arg.(type) {
 	case uint8:
 		return k.SetArgUint8(index, val)
 	case int8:
 		return k.SetArgInt8(index, val)
 	case uint32:
 		return k.SetArgUint32(index, val)
 	case uint64:
 		return k.SetArgUint64(index, val)
 	case int32:
 		return k.SetArgInt32(index, val)
 	case float32:
 		return k.SetArgFloat32(index, val)
 	case *MemObject:
 		return k.SetArgBuffer(index, val)
 	case LocalBuffer:
 		return k.SetArgLocal(index, int(val))
 	default:
 		return ErrUnsupportedArgumentType{Index: index, Value: arg}
 	}
 }
 func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem))
 }
 func (k *Kernel) SetArgFloat32(index int, val float32) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
 }
 func (k *Kernel) SetArgInt8(index int, val int8) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
 }
 func (k *Kernel) SetArgUint8(index int, val uint8) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
 }
 func (k *Kernel) SetArgInt32(index int, val int32) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
 }
 func (k *Kernel) SetArgUint32(index int, val uint32) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
 }
 func (k *Kernel) SetArgUint64(index int, val uint64) error {
 	return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
 }
 func (k *Kernel) SetArgLocal(index int, size int) error {
 	return k.SetArgUnsafe(index, size, nil)
 }
 func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error {
 	//fmt.Println("FUNKY: ", index, argSize)
 	return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg))
 }
 func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) {
 	var size C.size_t
 	err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
 	return int(size), toError(err)
 }
 func (k *Kernel) WorkGroupSize(device *Device) (int, error) {
 	var size C.size_t
 	err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
 	return int(size), toError(err)
 }
 func (k *Kernel) NumArgs() (int, error) {
 	var num C.cl_uint
 	err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil)
 	return int(num), toError(err)
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
@ -0,0 +1,7 @@
 // +build !cl12
 package cl
 func (k *Kernel) ArgName(index int) (string, error) {
 	return "", ErrUnsupported
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
@ -0,0 +1,20 @@
 // +build cl12
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import "unsafe"
 func (k *Kernel) ArgName(index int) (string, error) {
 	var strC [1024]byte
 	var strN C.size_t
 	if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
 		return "", toError(err)
 	}
 	return string(strC[:strN]), nil
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
@ -0,0 +1,83 @@
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import "unsafe"
 const maxPlatforms = 32
 type Platform struct {
 	id C.cl_platform_id
 }
 // Obtain the list of platforms available.
 func GetPlatforms() ([]*Platform, error) {
 	var platformIds [maxPlatforms]C.cl_platform_id
 	var nPlatforms C.cl_uint
 	if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	platforms := make([]*Platform, nPlatforms)
 	for i := 0; i < int(nPlatforms); i++ {
 		platforms[i] = &Platform{id: platformIds[i]}
 	}
 	return platforms, nil
 }
 func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) {
 	return GetDevices(p, deviceType)
 }
 func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) {
 	var strC [2048]byte
 	var strN C.size_t
 	if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
 		return "", toError(err)
 	}
 	return string(strC[:(strN - 1)]), nil
 }
 func (p *Platform) Name() string {
 	if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil {
 		panic("Platform.Name() should never fail")
 	} else {
 		return str
 	}
 }
 func (p *Platform) Vendor() string {
 	if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil {
 		panic("Platform.Vendor() should never fail")
 	} else {
 		return str
 	}
 }
 func (p *Platform) Profile() string {
 	if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil {
 		panic("Platform.Profile() should never fail")
 	} else {
 		return str
 	}
 }
 func (p *Platform) Version() string {
 	if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil {
 		panic("Platform.Version() should never fail")
 	} else {
 		return str
 	}
 }
 func (p *Platform) Extensions() string {
 	if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil {
 		panic("Platform.Extensions() should never fail")
 	} else {
 		return str
 	}
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
@ -0,0 +1,105 @@
 package cl
 // #include <stdlib.h>
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import (
 	"fmt"
 	"runtime"
 	"unsafe"
 )
 type BuildError struct {
 	Message string
 	Device  *Device
 }
 func (e BuildError) Error() string {
 	if e.Device != nil {
 		return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message)
 	} else {
 		return fmt.Sprintf("cl: build error: %s", e.Message)
 	}
 }
 type Program struct {
 	clProgram C.cl_program
 	devices   []*Device
 }
 func releaseProgram(p *Program) {
 	if p.clProgram != nil {
 		C.clReleaseProgram(p.clProgram)
 		p.clProgram = nil
 	}
 }
 func (p *Program) Release() {
 	releaseProgram(p)
 }
 func (p *Program) BuildProgram(devices []*Device, options string) error {
 	var cOptions *C.char
 	if options != "" {
 		cOptions = C.CString(options)
 		defer C.free(unsafe.Pointer(cOptions))
 	}
 	var deviceList []C.cl_device_id
 	var deviceListPtr *C.cl_device_id
 	numDevices := C.cl_uint(len(devices))
 	if devices != nil && len(devices) > 0 {
 		deviceList = buildDeviceIdList(devices)
 		deviceListPtr = &deviceList[0]
 	}
 	if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS {
 		buffer := make([]byte, 4096)
 		var bLen C.size_t
 		var err C.cl_int
 		for _, dev := range p.devices {
 			for i := 2; i >= 0; i-- {
 				err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen)
 				if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 {
 					// INVALID_VALUE probably means our buffer isn't large enough
 					buffer = make([]byte, bLen)
 				} else {
 					break
 				}
 			}
 			if err != C.CL_SUCCESS {
 				return toError(err)
 			}
 			if bLen > 1 {
 				return BuildError{
 					Device:  dev,
 					Message: string(buffer[:bLen-1]),
 				}
 			}
 		}
 		return BuildError{
 			Device:  nil,
 			Message: "build failed and produced no log entries",
 		}
 	}
 	return nil
 }
 func (p *Program) CreateKernel(name string) (*Kernel, error) {
 	cName := C.CString(name)
 	defer C.free(unsafe.Pointer(cName))
 	var err C.cl_int
 	clKernel := C.clCreateKernel(p.clProgram, cName, &err)
 	if err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	kernel := &Kernel{clKernel: clKernel, name: name}
 	runtime.SetFinalizer(kernel, releaseKernel)
 	return kernel, nil
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
@ -0,0 +1,193 @@
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import "unsafe"
 type CommandQueueProperty int
 const (
 	CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
 	CommandQueueProfilingEnable          CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE
 )
 type CommandQueue struct {
 	clQueue C.cl_command_queue
 	device  *Device
 }
 func releaseCommandQueue(q *CommandQueue) {
 	if q.clQueue != nil {
 		C.clReleaseCommandQueue(q.clQueue)
 		q.clQueue = nil
 	}
 }
 // Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
 func (q *CommandQueue) Release() {
 	releaseCommandQueue(q)
 }
 // Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
 func (q *CommandQueue) Finish() error {
 	return toError(C.clFinish(q.clQueue))
 }
 // Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
 func (q *CommandQueue) Flush() error {
 	return toError(C.clFlush(q.clQueue))
 }
 // Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
 func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
 	var event C.cl_event
 	var err C.cl_int
 	ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
 	if err != C.CL_SUCCESS {
 		return nil, nil, toError(err)
 	}
 	ev := newEvent(event)
 	if ptr == nil {
 		return nil, ev, ErrUnknown
 	}
 	return &MappedMemObject{ptr: ptr, size: size}, ev, nil
 }
 // Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
 func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
 	cOrigin := sizeT3(origin)
 	cRegion := sizeT3(region)
 	var event C.cl_event
 	var err C.cl_int
 	var rowPitch, slicePitch C.size_t
 	ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
 	if err != C.CL_SUCCESS {
 		return nil, nil, toError(err)
 	}
 	ev := newEvent(event)
 	if ptr == nil {
 		return nil, ev, ErrUnknown
 	}
 	size := 0 // TODO: could calculate this
 	return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil
 }
 // Enqueues a command to unmap a previously mapped region of a memory object.
 func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS {
 		return nil, toError(err)
 	}
 	return newEvent(event), nil
 }
 // Enqueues a command to copy a buffer object to another buffer object.
 func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 // Enqueue commands to write to a buffer object from host memory.
 func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
 	dataPtr := unsafe.Pointer(&data[0])
 	dataSize := int(unsafe.Sizeof(data[0])) * len(data)
 	return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
 }
 // Enqueue commands to read from a buffer object to host memory.
 func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
 	dataPtr := unsafe.Pointer(&data[0])
 	dataSize := int(unsafe.Sizeof(data[0])) * len(data)
 	return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
 }
 // Enqueues a command to execute a kernel on a device.
 func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) {
 	workDim := len(globalWorkSize)
 	var globalWorkOffsetList []C.size_t
 	var globalWorkOffsetPtr *C.size_t
 	if globalWorkOffset != nil {
 		globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset))
 		for i, off := range globalWorkOffset {
 			globalWorkOffsetList[i] = C.size_t(off)
 		}
 		globalWorkOffsetPtr = &globalWorkOffsetList[0]
 	}
 	var globalWorkSizeList []C.size_t
 	var globalWorkSizePtr *C.size_t
 	if globalWorkSize != nil {
 		globalWorkSizeList = make([]C.size_t, len(globalWorkSize))
 		for i, off := range globalWorkSize {
 			globalWorkSizeList[i] = C.size_t(off)
 		}
 		globalWorkSizePtr = &globalWorkSizeList[0]
 	}
 	var localWorkSizeList []C.size_t
 	var localWorkSizePtr *C.size_t
 	if localWorkSize != nil {
 		localWorkSizeList = make([]C.size_t, len(localWorkSize))
 		for i, off := range localWorkSize {
 			localWorkSizeList[i] = C.size_t(off)
 		}
 		localWorkSizePtr = &localWorkSizeList[0]
 	}
 	var event C.cl_event
 	err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 // Enqueues a command to read from a 2D or 3D image object to host memory.
 func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
 	cOrigin := sizeT3(origin)
 	cRegion := sizeT3(region)
 	var event C.cl_event
 	err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 // Enqueues a command to write from a 2D or 3D image object to host memory.
 func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
 	cOrigin := sizeT3(origin)
 	cRegion := sizeT3(region)
 	var event C.cl_event
 	err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 // A synchronization point that enqueues a barrier operation.
 func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
 // Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
 func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) {
 	var event C.cl_event
 	err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
 	return newEvent(event), err
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
@ -0,0 +1,487 @@
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 import (
 	"errors"
 	"fmt"
 	"reflect"
 	"runtime"
 	"strings"
 	"unsafe"
 )
 var (
 	ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
 )
 type ErrOther int
 func (e ErrOther) Error() string {
 	return fmt.Sprintf("cl: error %d", int(e))
 }
 var (
 	ErrDeviceNotFound                     = errors.New("cl: Device Not Found")
 	ErrDeviceNotAvailable                 = errors.New("cl: Device Not Available")
 	ErrCompilerNotAvailable               = errors.New("cl: Compiler Not Available")
 	ErrMemObjectAllocationFailure         = errors.New("cl: Mem Object Allocation Failure")
 	ErrOutOfResources                     = errors.New("cl: Out Of Resources")
 	ErrOutOfHostMemory                    = errors.New("cl: Out Of Host Memory")
 	ErrProfilingInfoNotAvailable          = errors.New("cl: Profiling Info Not Available")
 	ErrMemCopyOverlap                     = errors.New("cl: Mem Copy Overlap")
 	ErrImageFormatMismatch                = errors.New("cl: Image Format Mismatch")
 	ErrImageFormatNotSupported            = errors.New("cl: Image Format Not Supported")
 	ErrBuildProgramFailure                = errors.New("cl: Build Program Failure")
 	ErrMapFailure                         = errors.New("cl: Map Failure")
 	ErrMisalignedSubBufferOffset          = errors.New("cl: Misaligned Sub Buffer Offset")
 	ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List")
 	ErrCompileProgramFailure              = errors.New("cl: Compile Program Failure")
 	ErrLinkerNotAvailable                 = errors.New("cl: Linker Not Available")
 	ErrLinkProgramFailure                 = errors.New("cl: Link Program Failure")
 	ErrDevicePartitionFailed              = errors.New("cl: Device Partition Failed")
 	ErrKernelArgInfoNotAvailable          = errors.New("cl: Kernel Arg Info Not Available")
 	ErrInvalidValue                       = errors.New("cl: Invalid Value")
 	ErrInvalidDeviceType                  = errors.New("cl: Invalid Device Type")
 	ErrInvalidPlatform                    = errors.New("cl: Invalid Platform")
 	ErrInvalidDevice                      = errors.New("cl: Invalid Device")
 	ErrInvalidContext                     = errors.New("cl: Invalid Context")
 	ErrInvalidQueueProperties             = errors.New("cl: Invalid Queue Properties")
 	ErrInvalidCommandQueue                = errors.New("cl: Invalid Command Queue")
 	ErrInvalidHostPtr                     = errors.New("cl: Invalid Host Ptr")
 	ErrInvalidMemObject                   = errors.New("cl: Invalid Mem Object")
 	ErrInvalidImageFormatDescriptor       = errors.New("cl: Invalid Image Format Descriptor")
 	ErrInvalidImageSize                   = errors.New("cl: Invalid Image Size")
 	ErrInvalidSampler                     = errors.New("cl: Invalid Sampler")
 	ErrInvalidBinary                      = errors.New("cl: Invalid Binary")
 	ErrInvalidBuildOptions                = errors.New("cl: Invalid Build Options")
 	ErrInvalidProgram                     = errors.New("cl: Invalid Program")
 	ErrInvalidProgramExecutable           = errors.New("cl: Invalid Program Executable")
 	ErrInvalidKernelName                  = errors.New("cl: Invalid Kernel Name")
 	ErrInvalidKernelDefinition            = errors.New("cl: Invalid Kernel Definition")
 	ErrInvalidKernel                      = errors.New("cl: Invalid Kernel")
 	ErrInvalidArgIndex                    = errors.New("cl: Invalid Arg Index")
 	ErrInvalidArgValue                    = errors.New("cl: Invalid Arg Value")
 	ErrInvalidArgSize                     = errors.New("cl: Invalid Arg Size")
 	ErrInvalidKernelArgs                  = errors.New("cl: Invalid Kernel Args")
 	ErrInvalidWorkDimension               = errors.New("cl: Invalid Work Dimension")
 	ErrInvalidWorkGroupSize               = errors.New("cl: Invalid Work Group Size")
 	ErrInvalidWorkItemSize                = errors.New("cl: Invalid Work Item Size")
 	ErrInvalidGlobalOffset                = errors.New("cl: Invalid Global Offset")
 	ErrInvalidEventWaitList               = errors.New("cl: Invalid Event Wait List")
 	ErrInvalidEvent                       = errors.New("cl: Invalid Event")
 	ErrInvalidOperation                   = errors.New("cl: Invalid Operation")
 	ErrInvalidGlObject                    = errors.New("cl: Invalid Gl Object")
 	ErrInvalidBufferSize                  = errors.New("cl: Invalid Buffer Size")
 	ErrInvalidMipLevel                    = errors.New("cl: Invalid Mip Level")
 	ErrInvalidGlobalWorkSize              = errors.New("cl: Invalid Global Work Size")
 	ErrInvalidProperty                    = errors.New("cl: Invalid Property")
 	ErrInvalidImageDescriptor             = errors.New("cl: Invalid Image Descriptor")
 	ErrInvalidCompilerOptions             = errors.New("cl: Invalid Compiler Options")
 	ErrInvalidLinkerOptions               = errors.New("cl: Invalid Linker Options")
 	ErrInvalidDevicePartitionCount        = errors.New("cl: Invalid Device Partition Count")
 )
 var errorMap = map[C.cl_int]error{
 	C.CL_SUCCESS:                                   nil,
 	C.CL_DEVICE_NOT_FOUND:                          ErrDeviceNotFound,
 	C.CL_DEVICE_NOT_AVAILABLE:                      ErrDeviceNotAvailable,
 	C.CL_COMPILER_NOT_AVAILABLE:                    ErrCompilerNotAvailable,
 	C.CL_MEM_OBJECT_ALLOCATION_FAILURE:             ErrMemObjectAllocationFailure,
 	C.CL_OUT_OF_RESOURCES:                          ErrOutOfResources,
 	C.CL_OUT_OF_HOST_MEMORY:                        ErrOutOfHostMemory,
 	C.CL_PROFILING_INFO_NOT_AVAILABLE:              ErrProfilingInfoNotAvailable,
 	C.CL_MEM_COPY_OVERLAP:                          ErrMemCopyOverlap,
 	C.CL_IMAGE_FORMAT_MISMATCH:                     ErrImageFormatMismatch,
 	C.CL_IMAGE_FORMAT_NOT_SUPPORTED:                ErrImageFormatNotSupported,
 	C.CL_BUILD_PROGRAM_FAILURE:                     ErrBuildProgramFailure,
 	C.CL_MAP_FAILURE:                               ErrMapFailure,
 	C.CL_MISALIGNED_SUB_BUFFER_OFFSET:              ErrMisalignedSubBufferOffset,
 	C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList,
 	C.CL_INVALID_VALUE:                             ErrInvalidValue,
 	C.CL_INVALID_DEVICE_TYPE:                       ErrInvalidDeviceType,
 	C.CL_INVALID_PLATFORM:                          ErrInvalidPlatform,
 	C.CL_INVALID_DEVICE:                            ErrInvalidDevice,
 	C.CL_INVALID_CONTEXT:                           ErrInvalidContext,
 	C.CL_INVALID_QUEUE_PROPERTIES:                  ErrInvalidQueueProperties,
 	C.CL_INVALID_COMMAND_QUEUE:                     ErrInvalidCommandQueue,
 	C.CL_INVALID_HOST_PTR:                          ErrInvalidHostPtr,
 	C.CL_INVALID_MEM_OBJECT:                        ErrInvalidMemObject,
 	C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:           ErrInvalidImageFormatDescriptor,
 	C.CL_INVALID_IMAGE_SIZE:                        ErrInvalidImageSize,
 	C.CL_INVALID_SAMPLER:                           ErrInvalidSampler,
 	C.CL_INVALID_BINARY:                            ErrInvalidBinary,
 	C.CL_INVALID_BUILD_OPTIONS:                     ErrInvalidBuildOptions,
 	C.CL_INVALID_PROGRAM:                           ErrInvalidProgram,
 	C.CL_INVALID_PROGRAM_EXECUTABLE:                ErrInvalidProgramExecutable,
 	C.CL_INVALID_KERNEL_NAME:                       ErrInvalidKernelName,
 	C.CL_INVALID_KERNEL_DEFINITION:                 ErrInvalidKernelDefinition,
 	C.CL_INVALID_KERNEL:                            ErrInvalidKernel,
 	C.CL_INVALID_ARG_INDEX:                         ErrInvalidArgIndex,
 	C.CL_INVALID_ARG_VALUE:                         ErrInvalidArgValue,
 	C.CL_INVALID_ARG_SIZE:                          ErrInvalidArgSize,
 	C.CL_INVALID_KERNEL_ARGS:                       ErrInvalidKernelArgs,
 	C.CL_INVALID_WORK_DIMENSION:                    ErrInvalidWorkDimension,
 	C.CL_INVALID_WORK_GROUP_SIZE:                   ErrInvalidWorkGroupSize,
 	C.CL_INVALID_WORK_ITEM_SIZE:                    ErrInvalidWorkItemSize,
 	C.CL_INVALID_GLOBAL_OFFSET:                     ErrInvalidGlobalOffset,
 	C.CL_INVALID_EVENT_WAIT_LIST:                   ErrInvalidEventWaitList,
 	C.CL_INVALID_EVENT:                             ErrInvalidEvent,
 	C.CL_INVALID_OPERATION:                         ErrInvalidOperation,
 	C.CL_INVALID_GL_OBJECT:                         ErrInvalidGlObject,
 	C.CL_INVALID_BUFFER_SIZE:                       ErrInvalidBufferSize,
 	C.CL_INVALID_MIP_LEVEL:                         ErrInvalidMipLevel,
 	C.CL_INVALID_GLOBAL_WORK_SIZE:                  ErrInvalidGlobalWorkSize,
 	C.CL_INVALID_PROPERTY:                          ErrInvalidProperty,
 }
 func toError(code C.cl_int) error {
 	if err, ok := errorMap[code]; ok {
 		return err
 	}
 	return ErrOther(code)
 }
 type LocalMemType int
 const (
 	LocalMemTypeNone   LocalMemType = C.CL_NONE
 	LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL
 	LocalMemTypeLocal  LocalMemType = C.CL_LOCAL
 )
 var localMemTypeMap = map[LocalMemType]string{
 	LocalMemTypeNone:   "None",
 	LocalMemTypeGlobal: "Global",
 	LocalMemTypeLocal:  "Local",
 }
 func (t LocalMemType) String() string {
 	name := localMemTypeMap[t]
 	if name == "" {
 		name = "Unknown"
 	}
 	return name
 }
 type ExecCapability int
 const (
 	ExecCapabilityKernel       ExecCapability = C.CL_EXEC_KERNEL        // The OpenCL device can execute OpenCL kernels.
 	ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
 )
 func (ec ExecCapability) String() string {
 	var parts []string
 	if ec&ExecCapabilityKernel != 0 {
 		parts = append(parts, "Kernel")
 	}
 	if ec&ExecCapabilityNativeKernel != 0 {
 		parts = append(parts, "NativeKernel")
 	}
 	if parts == nil {
 		return ""
 	}
 	return strings.Join(parts, "|")
 }
 type MemCacheType int
 const (
 	MemCacheTypeNone           MemCacheType = C.CL_NONE
 	MemCacheTypeReadOnlyCache  MemCacheType = C.CL_READ_ONLY_CACHE
 	MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE
 )
 func (ct MemCacheType) String() string {
 	switch ct {
 	case MemCacheTypeNone:
 		return "None"
 	case MemCacheTypeReadOnlyCache:
 		return "ReadOnly"
 	case MemCacheTypeReadWriteCache:
 		return "ReadWrite"
 	}
 	return fmt.Sprintf("Unknown(%x)", int(ct))
 }
 type MemFlag int
 const (
 	MemReadWrite    MemFlag = C.CL_MEM_READ_WRITE
 	MemWriteOnly    MemFlag = C.CL_MEM_WRITE_ONLY
 	MemReadOnly     MemFlag = C.CL_MEM_READ_ONLY
 	MemUseHostPtr   MemFlag = C.CL_MEM_USE_HOST_PTR
 	MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR
 	MemCopyHostPtr  MemFlag = C.CL_MEM_COPY_HOST_PTR
 	MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY
 	MemReadOnlyHost  MemFlag = C.CL_MEM_HOST_READ_ONLY
 	MemNoAccessHost  MemFlag = C.CL_MEM_HOST_NO_ACCESS
 )
 type MemObjectType int
 const (
 	MemObjectTypeBuffer  MemObjectType = C.CL_MEM_OBJECT_BUFFER
 	MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D
 	MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D
 )
 type MapFlag int
 const (
 	// This flag specifies that the region being mapped in the memory object is being mapped for reading.
 	MapFlagRead                  MapFlag = C.CL_MAP_READ
 	MapFlagWrite                 MapFlag = C.CL_MAP_WRITE
 	MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
 )
 func (mf MapFlag) toCl() C.cl_map_flags {
 	return C.cl_map_flags(mf)
 }
 type ChannelOrder int
 const (
 	ChannelOrderR         ChannelOrder = C.CL_R
 	ChannelOrderA         ChannelOrder = C.CL_A
 	ChannelOrderRG        ChannelOrder = C.CL_RG
 	ChannelOrderRA        ChannelOrder = C.CL_RA
 	ChannelOrderRGB       ChannelOrder = C.CL_RGB
 	ChannelOrderRGBA      ChannelOrder = C.CL_RGBA
 	ChannelOrderBGRA      ChannelOrder = C.CL_BGRA
 	ChannelOrderARGB      ChannelOrder = C.CL_ARGB
 	ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY
 	ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE
 	ChannelOrderRx        ChannelOrder = C.CL_Rx
 	ChannelOrderRGx       ChannelOrder = C.CL_RGx
 	ChannelOrderRGBx      ChannelOrder = C.CL_RGBx
 )
 var channelOrderNameMap = map[ChannelOrder]string{
 	ChannelOrderR:         "R",
 	ChannelOrderA:         "A",
 	ChannelOrderRG:        "RG",
 	ChannelOrderRA:        "RA",
 	ChannelOrderRGB:       "RGB",
 	ChannelOrderRGBA:      "RGBA",
 	ChannelOrderBGRA:      "BGRA",
 	ChannelOrderARGB:      "ARGB",
 	ChannelOrderIntensity: "Intensity",
 	ChannelOrderLuminance: "Luminance",
 	ChannelOrderRx:        "Rx",
 	ChannelOrderRGx:       "RGx",
 	ChannelOrderRGBx:      "RGBx",
 }
 func (co ChannelOrder) String() string {
 	name := channelOrderNameMap[co]
 	if name == "" {
 		name = fmt.Sprintf("Unknown(%x)", int(co))
 	}
 	return name
 }
 type ChannelDataType int
 const (
 	ChannelDataTypeSNormInt8      ChannelDataType = C.CL_SNORM_INT8
 	ChannelDataTypeSNormInt16     ChannelDataType = C.CL_SNORM_INT16
 	ChannelDataTypeUNormInt8      ChannelDataType = C.CL_UNORM_INT8
 	ChannelDataTypeUNormInt16     ChannelDataType = C.CL_UNORM_INT16
 	ChannelDataTypeUNormShort565  ChannelDataType = C.CL_UNORM_SHORT_565
 	ChannelDataTypeUNormShort555  ChannelDataType = C.CL_UNORM_SHORT_555
 	ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010
 	ChannelDataTypeSignedInt8     ChannelDataType = C.CL_SIGNED_INT8
 	ChannelDataTypeSignedInt16    ChannelDataType = C.CL_SIGNED_INT16
 	ChannelDataTypeSignedInt32    ChannelDataType = C.CL_SIGNED_INT32
 	ChannelDataTypeUnsignedInt8   ChannelDataType = C.CL_UNSIGNED_INT8
 	ChannelDataTypeUnsignedInt16  ChannelDataType = C.CL_UNSIGNED_INT16
 	ChannelDataTypeUnsignedInt32  ChannelDataType = C.CL_UNSIGNED_INT32
 	ChannelDataTypeHalfFloat      ChannelDataType = C.CL_HALF_FLOAT
 	ChannelDataTypeFloat          ChannelDataType = C.CL_FLOAT
 )
 var channelDataTypeNameMap = map[ChannelDataType]string{
 	ChannelDataTypeSNormInt8:      "SNormInt8",
 	ChannelDataTypeSNormInt16:     "SNormInt16",
 	ChannelDataTypeUNormInt8:      "UNormInt8",
 	ChannelDataTypeUNormInt16:     "UNormInt16",
 	ChannelDataTypeUNormShort565:  "UNormShort565",
 	ChannelDataTypeUNormShort555:  "UNormShort555",
 	ChannelDataTypeUNormInt101010: "UNormInt101010",
 	ChannelDataTypeSignedInt8:     "SignedInt8",
 	ChannelDataTypeSignedInt16:    "SignedInt16",
 	ChannelDataTypeSignedInt32:    "SignedInt32",
 	ChannelDataTypeUnsignedInt8:   "UnsignedInt8",
 	ChannelDataTypeUnsignedInt16:  "UnsignedInt16",
 	ChannelDataTypeUnsignedInt32:  "UnsignedInt32",
 	ChannelDataTypeHalfFloat:      "HalfFloat",
 	ChannelDataTypeFloat:          "Float",
 }
 func (ct ChannelDataType) String() string {
 	name := channelDataTypeNameMap[ct]
 	if name == "" {
 		name = fmt.Sprintf("Unknown(%x)", int(ct))
 	}
 	return name
 }
 type ImageFormat struct {
 	ChannelOrder    ChannelOrder
 	ChannelDataType ChannelDataType
 }
 func (f ImageFormat) toCl() C.cl_image_format {
 	var format C.cl_image_format
 	format.image_channel_order = C.cl_channel_order(f.ChannelOrder)
 	format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType)
 	return format
 }
 type ProfilingInfo int
 const (
 	// A 64-bit value that describes the current device time counter in
 	// nanoseconds when the command identified by event is enqueued in
 	// a command-queue by the host.
 	ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED
 	// A 64-bit value that describes the current device time counter in
 	// nanoseconds when the command identified by event that has been
 	// enqueued is submitted by the host to the device associated with the command-queue.
 	ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT
 	// A 64-bit value that describes the current device time counter in
 	// nanoseconds when the command identified by event starts execution on the device.
 	ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START
 	// A 64-bit value that describes the current device time counter in
 	// nanoseconds when the command identified by event has finished
 	// execution on the device.
 	ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END
 )
 type CommmandExecStatus int
 const (
 	CommmandExecStatusComplete  CommmandExecStatus = C.CL_COMPLETE
 	CommmandExecStatusRunning   CommmandExecStatus = C.CL_RUNNING
 	CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED
 	CommmandExecStatusQueued    CommmandExecStatus = C.CL_QUEUED
 )
 type Event struct {
 	clEvent C.cl_event
 }
 func releaseEvent(ev *Event) {
 	if ev.clEvent != nil {
 		C.clReleaseEvent(ev.clEvent)
 		ev.clEvent = nil
 	}
 }
 func (e *Event) Release() {
 	releaseEvent(e)
 }
 func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) {
 	var paramValue C.cl_ulong
 	if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(&paramValue), nil); err != C.CL_SUCCESS {
 		return 0, toError(err)
 	}
 	return int64(paramValue), nil
 }
 // Sets the execution status of a user event object.
 //
 // `status` specifies the new execution status to be set and
 // can be CL_COMPLETE or a negative integer value to indicate
 // an error. A negative integer value causes all enqueued commands
 // that wait on this user event to be terminated. clSetUserEventStatus
 // can only be called once to change the execution status of event.
 func (e *Event) SetUserEventStatus(status int) error {
 	return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status)))
 }
 // Waits on the host thread for commands identified by event objects in
 // events to complete. A command is considered complete if its execution
 // status is CL_COMPLETE or a negative value. The events specified in
 // event_list act as synchronization points.
 //
 // If the cl_khr_gl_event extension is enabled, event objects can also be
 // used to reflect the status of an OpenGL sync object. The sync object
 // in turn refers to a fence command executing in an OpenGL command
 // stream. This provides another method of coordinating sharing of buffers
 // and images between OpenGL and OpenCL.
 func WaitForEvents(events []*Event) error {
 	return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events)))
 }
 func newEvent(clEvent C.cl_event) *Event {
 	ev := &Event{clEvent: clEvent}
 	runtime.SetFinalizer(ev, releaseEvent)
 	return ev
 }
 func eventListPtr(el []*Event) *C.cl_event {
 	if el == nil {
 		return nil
 	}
 	elist := make([]C.cl_event, len(el))
 	for i, e := range el {
 		elist[i] = e.clEvent
 	}
 	return (*C.cl_event)(&elist[0])
 }
 func clBool(b bool) C.cl_bool {
 	if b {
 		return C.CL_TRUE
 	}
 	return C.CL_FALSE
 }
 func sizeT3(i3 [3]int) [3]C.size_t {
 	var val [3]C.size_t
 	val[0] = C.size_t(i3[0])
 	val[1] = C.size_t(i3[1])
 	val[2] = C.size_t(i3[2])
 	return val
 }
 type MappedMemObject struct {
 	ptr        unsafe.Pointer
 	size       int
 	rowPitch   int
 	slicePitch int
 }
 func (mb *MappedMemObject) ByteSlice() []byte {
 	var byteSlice []byte
 	sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice))
 	sliceHeader.Cap = mb.size
 	sliceHeader.Len = mb.size
 	sliceHeader.Data = uintptr(mb.ptr)
 	return byteSlice
 }
 func (mb *MappedMemObject) Ptr() unsafe.Pointer {
 	return mb.ptr
 }
 func (mb *MappedMemObject) Size() int {
 	return mb.size
 }
 func (mb *MappedMemObject) RowPitch() int {
 	return mb.rowPitch
 }
 func (mb *MappedMemObject) SlicePitch() int {
 	return mb.slicePitch
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
@ -0,0 +1,71 @@
 // +build cl12
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 const (
 	ChannelDataTypeUNormInt24  ChannelDataType = C.CL_UNORM_INT24
 	ChannelOrderDepth          ChannelOrder    = C.CL_DEPTH
 	ChannelOrderDepthStencil   ChannelOrder    = C.CL_DEPTH_STENCIL
 	MemHostNoAccess            MemFlag         = C.CL_MEM_HOST_NO_ACCESS  // OpenCL 1.2
 	MemHostReadOnly            MemFlag         = C.CL_MEM_HOST_READ_ONLY  // OpenCL 1.2
 	MemHostWriteOnly           MemFlag         = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
 	MemObjectTypeImage1D       MemObjectType   = C.CL_MEM_OBJECT_IMAGE1D
 	MemObjectTypeImage1DArray  MemObjectType   = C.CL_MEM_OBJECT_IMAGE1D_ARRAY
 	MemObjectTypeImage1DBuffer MemObjectType   = C.CL_MEM_OBJECT_IMAGE1D_BUFFER
 	MemObjectTypeImage2DArray  MemObjectType   = C.CL_MEM_OBJECT_IMAGE2D_ARRAY
 	// This flag specifies that the region being mapped in the memory object is being mapped for writing.
 	//
 	// The contents of the region being mapped are to be discarded. This is typically the case when the
 	// region being mapped is overwritten by the host. This flag allows the implementation to no longer
 	// guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
 	// latest bits in the region being mapped which can be a significant performance enhancement.
 	MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
 )
 func init() {
 	errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure
 	errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed
 	errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions
 	errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount
 	errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor
 	errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions
 	errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable
 	errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure
 	errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable
 	channelOrderNameMap[ChannelOrderDepth] = "Depth"
 	channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil"
 	channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24"
 }
 type ImageDescription struct {
 	Type                            MemObjectType
 	Width, Height, Depth            int
 	ArraySize, RowPitch, SlicePitch int
 	NumMipLevels, NumSamples        int
 	Buffer                          *MemObject
 }
 func (d ImageDescription) toCl() C.cl_image_desc {
 	var desc C.cl_image_desc
 	desc.image_type = C.cl_mem_object_type(d.Type)
 	desc.image_width = C.size_t(d.Width)
 	desc.image_height = C.size_t(d.Height)
 	desc.image_depth = C.size_t(d.Depth)
 	desc.image_array_size = C.size_t(d.ArraySize)
 	desc.image_row_pitch = C.size_t(d.RowPitch)
 	desc.image_slice_pitch = C.size_t(d.SlicePitch)
 	desc.num_mip_levels = C.cl_uint(d.NumMipLevels)
 	desc.num_samples = C.cl_uint(d.NumSamples)
 	desc.buffer = nil
 	if d.Buffer != nil {
 		desc.buffer = d.Buffer.clMem
 	}
 	return desc
 }
--- a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
@ -0,0 +1,45 @@
 package cl
 // #ifdef __APPLE__
 // #include "OpenCL/opencl.h"
 // #else
 // #include "cl.h"
 // #endif
 import "C"
 // Extension: cl_APPLE_fixed_alpha_channel_orders
 //
 // These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
 // They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored.  On calls to read_imagef,
 // the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
 // On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
 // currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
 // image formats.
 const (
 	ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
 	ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
 )
 // Extension: cl_APPLE_biased_fixed_point_image_formats
 //
 // This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
 // It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
 // float to this fixed point format is defined as follows:
 //
 //      ushort float_to_sfixed14( float x ){
 //          int i = convert_int_sat_rte( x * 0x1.0p14f );         // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
 //          i = add_sat( i, 0x4000 );                             // apply bias, to convert to [0, 65535) range
 //          return convert_ushort_sat(i);                         // clamp to destination size
 //      }
 //
 // The inverse conversion is the reverse process. The formats are currently only available on the CPU with
 // the CL_RGBA channel layout.
 const (
 	ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
 )
 func init() {
 	channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple"
 	channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple"
 	channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple"
 }
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
@ -30,8 +30,8 @@ import (
 )
 var (
-	minDifficulty = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
+	maxUint256  = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
-	sharedLight   = new(Light)
+	sharedLight = new(Light)
 )
 const (
@ -140,7 +140,7 @@ func (l *Light) Verify(block pow.Block) bool {
 	// the finalizer before the call completes.
 	_ = cache
 	// The actual check.
-	target := new(big.Int).Div(minDifficulty, difficulty)
+	target := new(big.Int).Div(maxUint256, difficulty)
 	return h256ToHash(ret.result).Big().Cmp(target) <= 0
 }
@ -199,7 +199,7 @@ func (d *dag) generate() {
 		if d.dir == "" {
 			d.dir = DefaultDir
 		}
-		glog.V(logger.Info).Infof("Generating DAG for epoch %d (%x)", d.epoch, seedHash)
+		glog.V(logger.Info).Infof("Generating DAG for epoch %d (size %d) (%x)", d.epoch, dagSize, seedHash)
 		// Generate a temporary cache.
 		// TODO: this could share the cache with Light
 		cache := C.ethash_light_new_internal(cacheSize, (*C.ethash_h256_t)(unsafe.Pointer(&seedHash[0])))
@ -220,14 +220,18 @@ func (d *dag) generate() {
 	})
 }
-func freeDAG(h *dag) {
+func freeDAG(d *dag) {
-	C.ethash_full_delete(h.ptr)
+	C.ethash_full_delete(d.ptr)
-	h.ptr = nil
+	d.ptr = nil
 }
 func (d *dag) Ptr() unsafe.Pointer {
 	return unsafe.Pointer(d.ptr.data)
 }
 //export ethashGoCallback
 func ethashGoCallback(percent C.unsigned) C.int {
-	glog.V(logger.Info).Infof("Still generating DAG: %d%%", percent)
+	glog.V(logger.Info).Infof("Generating DAG: %d%%", percent)
 	return 0
 }
@ -273,7 +277,7 @@ func (pow *Full) getDAG(blockNum uint64) (d *dag) {
 	return d
 }
-func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mixDigest []byte) {
+func (pow *Full) Search(block pow.Block, stop <-chan struct{}, index int) (nonce uint64, mixDigest []byte) {
 	dag := pow.getDAG(block.NumberU64())
 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
@ -286,7 +290,7 @@ func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mi
 	nonce = uint64(r.Int63())
 	hash := hashToH256(block.HashNoNonce())
-	target := new(big.Int).Div(minDifficulty, diff)
+	target := new(big.Int).Div(maxUint256, diff)
 	for {
 		select {
 		case <-stop:
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
@ -0,0 +1,629 @@
 // Copyright 2014 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // The go-ethereum library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 // GNU Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 // +build opencl
 package ethash
 //#cgo LDFLAGS: -w
 //#include <stdint.h>
 //#include <string.h>
 //#include "src/libethash/internal.h"
 import "C"
 import (
 	crand "crypto/rand"
 	"encoding/binary"
 	"fmt"
 	"math"
 	"math/big"
 	mrand "math/rand"
 	"strconv"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
 	"unsafe"
 	"github.com/Gustav-Simonsson/go-opencl/cl"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/pow"
 )
 /*
  This code have two main entry points:
  1. The initCL(...)  function configures one or more OpenCL device
     (for now only GPU) and loads the Ethash DAG onto device memory
  2. The Search(...) function loads a Ethash nonce into device(s) memory and
     executes the Ethash OpenCL kernel.
  Throughout the code, we refer to "host memory" and "device memory".
  For most systems (e.g. regular PC GPU miner) the host memory is RAM and
  device memory is the GPU global memory (e.g. GDDR5).
  References mentioned in code comments:
  1. https://github.com/ethereum/wiki/wiki/Ethash
  2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
  3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
  4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
 */
 type OpenCLDevice struct {
 	deviceId int
 	device   *cl.Device
 	openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
 	openCL12 bool
 	dagBuf        *cl.MemObject // Ethash full DAG in device mem
 	headerBuf     *cl.MemObject // Hash of block-to-mine in device mem
 	searchBuffers []*cl.MemObject
 	searchKernel *cl.Kernel
 	hashKernel   *cl.Kernel
 	queue         *cl.CommandQueue
 	ctx           *cl.Context
 	workGroupSize int
 	nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
 	result    common.Hash
 }
 type OpenCLMiner struct {
 	mu sync.Mutex
 	ethash *Ethash // Ethash full DAG & cache in host mem
 	deviceIds []int
 	devices   []*OpenCLDevice
 	dagSize uint64
 	hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
 }
 type pendingSearch struct {
 	bufIndex   uint32
 	startNonce uint64
 }
 const (
 	SIZEOF_UINT32 = 4
 	// See [1]
 	ethashMixBytesLen = 128
 	ethashAccesses    = 64
 	// See [4]
 	workGroupSize    = 32 // must be multiple of 8
 	maxSearchResults = 63
 	searchBufSize    = 2
 	globalWorkSize   = 1024 * 256
 )
 func NewCL(deviceIds []int) *OpenCLMiner {
 	ids := make([]int, len(deviceIds))
 	copy(ids, deviceIds)
 	return &OpenCLMiner{
 		ethash:    New(),
 		dagSize:   0, // to see if we need to update DAG.
 		deviceIds: ids,
 	}
 }
 func PrintDevices() {
 	fmt.Println("=============================================")
 	fmt.Println("============ OpenCL Device Info =============")
 	fmt.Println("=============================================")
 	var found []*cl.Device
 	platforms, err := cl.GetPlatforms()
 	if err != nil {
 		fmt.Println("Plaform error (check your OpenCL installation): %v", err)
 		return
 	}
 	for i, p := range platforms {
 		fmt.Println("Platform id             ", i)
 		fmt.Println("Platform Name           ", p.Name())
 		fmt.Println("Platform Vendor         ", p.Vendor())
 		fmt.Println("Platform Version        ", p.Version())
 		fmt.Println("Platform Extensions     ", p.Extensions())
 		fmt.Println("Platform Profile        ", p.Profile())
 		fmt.Println("")
 		devices, err := cl.GetDevices(p, cl.DeviceTypeGPU)
 		if err != nil {
 			fmt.Println("Device error (check your GPU drivers) :", err)
 			return
 		}
 		for _, d := range devices {
 			fmt.Println("Device OpenCL id        ", i)
 			fmt.Println("Device id for mining    ", len(found))
 			fmt.Println("Device Name             ", d.Name())
 			fmt.Println("Vendor                  ", d.Vendor())
 			fmt.Println("Version                 ", d.Version())
 			fmt.Println("Driver version          ", d.DriverVersion())
 			fmt.Println("Address bits            ", d.AddressBits())
 			fmt.Println("Max clock freq          ", d.MaxClockFrequency())
 			fmt.Println("Global mem size         ", d.GlobalMemSize())
 			fmt.Println("Max constant buffer size", d.MaxConstantBufferSize())
 			fmt.Println("Max mem alloc size      ", d.MaxMemAllocSize())
 			fmt.Println("Max compute units       ", d.MaxComputeUnits())
 			fmt.Println("Max work group size     ", d.MaxWorkGroupSize())
 			fmt.Println("Max work item sizes     ", d.MaxWorkItemSizes())
 			fmt.Println("=============================================")
 			found = append(found, d)
 		}
 	}
 	if len(found) == 0 {
 		fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)")
 	} else {
 		var idsFormat string
 		for i := 0; i < len(found); i++ {
 			idsFormat += strconv.Itoa(i)
 			if i != len(found)-1 {
 				idsFormat += ","
 			}
 		}
 		fmt.Printf("Found %v devices. Benchmark first GPU:       geth gpubench 0\n", len(found))
 		fmt.Printf("Mine using all GPUs:                        geth --minegpu %v\n", idsFormat)
 	}
 }
 // See [2]. We basically do the same here, but the Go OpenCL bindings
 // are at a slightly higher abtraction level.
 func InitCL(blockNum uint64, c *OpenCLMiner) error {
 	platforms, err := cl.GetPlatforms()
 	if err != nil {
 		return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err)
 	}
 	var devices []*cl.Device
 	for _, p := range platforms {
 		ds, err := cl.GetDevices(p, cl.DeviceTypeGPU)
 		if err != nil {
 			return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err)
 		}
 		for _, d := range ds {
 			devices = append(devices, d)
 		}
 	}
 	pow := New()
 	_ = pow.getDAG(blockNum)     // generates DAG if we don't have it
 	pow.Light.getCache(blockNum) // and cache
 	c.ethash = pow
 	dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum)))
 	c.dagSize = dagSize
 	for _, id := range c.deviceIds {
 		if id > len(devices)-1 {
 			return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo")
 		} else {
 			err := initCLDevice(id, devices[id], c)
 			if err != nil {
 				return err
 			}
 		}
 	}
 	if len(c.devices) == 0 {
 		return fmt.Errorf("No GPU devices found")
 	}
 	return nil
 }
 func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error {
 	devMaxAlloc := uint64(device.MaxMemAllocSize())
 	devGlobalMem := uint64(device.GlobalMemSize())
 	// TODO: more fine grained version logic
 	if device.Version() == "OpenCL 1.0" {
 		fmt.Println("Device OpenCL version not supported: ", device.Version())
 		return fmt.Errorf("opencl version not supported")
 	}
 	var cl11, cl12 bool
 	if device.Version() == "OpenCL 1.1" {
 		cl11 = true
 	}
 	if device.Version() == "OpenCL 1.2" {
 		cl12 = true
 	}
 	// log warnings but carry on; some device drivers report inaccurate values
 	if c.dagSize > devGlobalMem {
 		fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize)
 	}
 	if c.dagSize > devMaxAlloc {
 		fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc)
 		fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n")
 	}
 	fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name())
 	context, err := cl.CreateContext([]*cl.Device{device})
 	if err != nil {
 		return fmt.Errorf("failed creating context:", err)
 	}
 	// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
 	queue, err := context.CreateCommandQueue(device, 0)
 	if err != nil {
 		return fmt.Errorf("command queue err:", err)
 	}
 	// See [4] section 3.2 and [3] "clBuildProgram".
 	// The OpenCL kernel code is compiled at run-time.
 	kvs := make(map[string]string, 4)
 	kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10)
 	kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10)
 	kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10)
 	kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10)
 	kernelCode := replaceWords(kernel, kvs)
 	program, err := context.CreateProgramWithSource([]string{kernelCode})
 	if err != nil {
 		return fmt.Errorf("program err:", err)
 	}
 	/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device.
 	   see AMD OpenCL programming guide section 4.2
 	   export in shell before running:
 	   export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0"
 	   export CPU_MAX_COMPUTE_UNITS=1
 	buildOpts := "-g -cl-opt-disable"
 	*/
 	buildOpts := ""
 	err = program.BuildProgram([]*cl.Device{device}, buildOpts)
 	if err != nil {
 		return fmt.Errorf("program build err:", err)
 	}
 	var searchKernelName, hashKernelName string
 	searchKernelName = "ethash_search"
 	hashKernelName = "ethash_hash"
 	searchKernel, err := program.CreateKernel(searchKernelName)
 	hashKernel, err := program.CreateKernel(hashKernelName)
 	if err != nil {
 		return fmt.Errorf("kernel err:", err)
 	}
 	// TODO: when this DAG size appears, patch the Go bindings
 	// (context.go) to work with uint64 as size_t
 	if c.dagSize > math.MaxInt32 {
 		fmt.Println("DAG too large for allocation.")
 		return fmt.Errorf("DAG too large for alloc")
 	}
 	// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
 	// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
 	dagBuf := *(new(*cl.MemObject))
 	dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize))
 	if err != nil {
 		return fmt.Errorf("allocating dag buf failed: ", err)
 	}
 	// write DAG to device mem
 	dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data)
 	_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil)
 	if err != nil {
 		return fmt.Errorf("writing to dag buf failed: ", err)
 	}
 	searchBuffers := make([]*cl.MemObject, searchBufSize)
 	for i := 0; i < searchBufSize; i++ {
 		searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32)
 		if err != nil {
 			return fmt.Errorf("search buffer err:", err)
 		}
 		searchBuffers[i] = searchBuff
 	}
 	headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32)
 	if err != nil {
 		return fmt.Errorf("header buffer err:", err)
 	}
 	// Unique, random nonces are crucial for mining efficieny.
 	// While we do not need cryptographically secure PRNG for nonces,
 	// we want to have uniform distribution and minimal repetition of nonces.
 	// We could guarantee strict uniqueness of nonces by generating unique ranges,
 	// but a int64 seed from crypto/rand should be good enough.
 	// we then use math/rand for speed and to avoid draining OS entropy pool
 	seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
 	if err != nil {
 		return err
 	}
 	nonceRand := mrand.New(mrand.NewSource(seed.Int64()))
 	deviceStruct := &OpenCLDevice{
 		deviceId: deviceId,
 		device:   device,
 		openCL11: cl11,
 		openCL12: cl12,
 		dagBuf:        dagBuf,
 		headerBuf:     headerBuf,
 		searchBuffers: searchBuffers,
 		searchKernel: searchKernel,
 		hashKernel:   hashKernel,
 		queue: queue,
 		ctx:   context,
 		workGroupSize: workGroupSize,
 		nonceRand: nonceRand,
 	}
 	c.devices = append(c.devices, deviceStruct)
 	return nil
 }
 func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
 	c.mu.Lock()
 	newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64())))
 	if newDagSize > c.dagSize {
 		// TODO: clean up buffers from previous DAG?
 		err := InitCL(block.NumberU64(), c)
 		if err != nil {
 			fmt.Println("OpenCL init error: ", err)
 			return 0, []byte{0}
 		}
 	}
 	defer c.mu.Unlock()
 	// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
 	select {
 	case <-stop:
 		return 0, []byte{0}
 	default:
 	}
 	headerHash := block.HashNoNonce()
 	diff := block.Difficulty()
 	target256 := new(big.Int).Div(maxUint256, diff)
 	target64 := new(big.Int).Rsh(target256, 192).Uint64()
 	var zero uint32 = 0
 	d := c.devices[index]
 	_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil)
 	if err != nil {
 		fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
 		return 0, []byte{0}
 	}
 	for i := 0; i < searchBufSize; i++ {
 		_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil)
 		if err != nil {
 			fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
 			return 0, []byte{0}
 		}
 	}
 	// wait for all search buffers to complete
 	err = d.queue.Finish()
 	if err != nil {
 		fmt.Println("Error in Search clFinish : ", err)
 		return 0, []byte{0}
 	}
 	err = d.searchKernel.SetArg(1, d.headerBuf)
 	if err != nil {
 		fmt.Println("Error in Search clSetKernelArg : ", err)
 		return 0, []byte{0}
 	}
 	err = d.searchKernel.SetArg(2, d.dagBuf)
 	if err != nil {
 		fmt.Println("Error in Search clSetKernelArg : ", err)
 		return 0, []byte{0}
 	}
 	err = d.searchKernel.SetArg(4, target64)
 	if err != nil {
 		fmt.Println("Error in Search clSetKernelArg : ", err)
 		return 0, []byte{0}
 	}
 	err = d.searchKernel.SetArg(5, uint32(math.MaxUint32))
 	if err != nil {
 		fmt.Println("Error in Search clSetKernelArg : ", err)
 		return 0, []byte{0}
 	}
 	// wait on this before returning
 	var preReturnEvent *cl.Event
 	if d.openCL12 {
 		preReturnEvent, err = d.ctx.CreateUserEvent()
 		if err != nil {
 			fmt.Println("Error in Search create CL user event : ", err)
 			return 0, []byte{0}
 		}
 	}
 	pending := make([]pendingSearch, 0, searchBufSize)
 	var p *pendingSearch
 	searchBufIndex := uint32(0)
 	var checkNonce uint64
 	loops := int64(0)
 	prevHashRate := int32(0)
 	start := time.Now().UnixNano()
 	// we grab a single random nonce and sets this as argument to the kernel search function
 	// the device will then add each local threads gid to the nonce, creating a unique nonce
 	// for each device computing unit executing in parallel
 	initNonce := uint64(d.nonceRand.Int63())
 	for nonce := initNonce; ; nonce += uint64(globalWorkSize) {
 		select {
 		case <-stop:
 			/*
 				if d.openCL12 {
 					err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
 					if err != nil {
 						fmt.Println("Error in Search WaitForEvents: ", err)
 					}
 				}
 			*/
 			atomic.AddInt32(&c.hashRate, -prevHashRate)
 			return 0, []byte{0}
 		default:
 		}
 		if (loops % (1 << 7)) == 0 {
 			elapsed := time.Now().UnixNano() - start
 			// TODO: verify if this is correct hash rate calculation
 			hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256)
 			hashrateDiff := int32(hashes) - prevHashRate
 			prevHashRate = int32(hashes)
 			atomic.AddInt32(&c.hashRate, hashrateDiff)
 		}
 		loops++
 		err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex])
 		if err != nil {
 			fmt.Println("Error in Search clSetKernelArg : ", err)
 			return 0, []byte{0}
 		}
 		err = d.searchKernel.SetArg(3, nonce)
 		if err != nil {
 			fmt.Println("Error in Search clSetKernelArg : ", err)
 			return 0, []byte{0}
 		}
 		// execute kernel
 		_, err := d.queue.EnqueueNDRangeKernel(
 			d.searchKernel,
 			[]int{0},
 			[]int{globalWorkSize},
 			[]int{d.workGroupSize},
 			nil)
 		if err != nil {
 			fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err)
 			return 0, []byte{0}
 		}
 		pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce})
 		searchBufIndex = (searchBufIndex + 1) % searchBufSize
 		if len(pending) == searchBufSize {
 			p = &(pending[searchBufIndex])
 			cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true,
 				cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32,
 				nil)
 			if err != nil {
 				fmt.Println("Error in Search clEnqueueMapBuffer: ", err)
 				return 0, []byte{0}
 			}
 			results := cres.ByteSlice()
 			nfound := binary.LittleEndian.Uint32(results)
 			nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults)))
 			// OpenCL returns the offsets from the start nonce
 			for i := uint32(0); i < nfound; i++ {
 				lo := (i + 1) * SIZEOF_UINT32
 				hi := (i + 2) * SIZEOF_UINT32
 				upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi]))
 				checkNonce = p.startNonce + upperNonce
 				if checkNonce != 0 {
 					cn := C.uint64_t(checkNonce)
 					ds := C.uint64_t(c.dagSize)
 					// We verify that the nonce is indeed a solution by
 					// executing the Ethash verification function (on the CPU).
 					ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn)
 					// TODO: return result first
 					if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 {
 						_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
 						if err != nil {
 							fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err)
 						}
 						if d.openCL12 {
 							err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
 							if err != nil {
 								fmt.Println("Error in Search WaitForEvents: ", err)
 							}
 						}
 						return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32))
 					}
 					_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil)
 					if err != nil {
 						fmt.Println("Error in Search cl: EnqueueWriteBuffer", err)
 						return 0, []byte{0}
 					}
 				}
 			}
 			_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
 			if err != nil {
 				fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err)
 				return 0, []byte{0}
 			}
 			pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...)
 		}
 	}
 	if d.openCL12 {
 		err := cl.WaitForEvents([]*cl.Event{preReturnEvent})
 		if err != nil {
 			fmt.Println("Error in Search clWaitForEvents: ", err)
 			return 0, []byte{0}
 		}
 	}
 	return 0, []byte{0}
 }
 func (c *OpenCLMiner) Verify(block pow.Block) bool {
 	return c.ethash.Light.Verify(block)
 }
 func (c *OpenCLMiner) GetHashrate() int64 {
 	return int64(atomic.LoadInt32(&c.hashRate))
 }
 func (c *OpenCLMiner) Turbo(on bool) {
 	// This is GPU mining. Always be turbo.
 }
 func replaceWords(text string, kvs map[string]string) string {
 	for k, v := range kvs {
 		text = strings.Replace(text, k, v, -1)
 	}
 	return text
 }
 func logErr(err error) {
 	if err != nil {
 		fmt.Println("Error in OpenCL call:", err)
 	}
 }
 func argErr(err error) error {
 	return fmt.Errorf("arg err: %v", err)
 }
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
@ -0,0 +1,600 @@
 package ethash
 /*  DO NOT EDIT!!!
    This code is version controlled at
    https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
    If needed change it there first, then copy over here.
 */
 const kernel = `
 // author Tim Hughes <tim@twistedfury.com>
 // Tested on Radeon HD 7850
 // Hashrate: 15940347 hashes/s
 // Bandwidth: 124533 MB/s
 // search kernel should fit in <= 84 VGPRS (3 wavefronts)
 #define THREADS_PER_HASH (128 / 16)
 #define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
 #define FNV_PRIME	0x01000193
 __constant uint2 const Keccak_f1600_RC[24] = {
 	(uint2)(0x00000001, 0x00000000),
 	(uint2)(0x00008082, 0x00000000),
 	(uint2)(0x0000808a, 0x80000000),
 	(uint2)(0x80008000, 0x80000000),
 	(uint2)(0x0000808b, 0x00000000),
 	(uint2)(0x80000001, 0x00000000),
 	(uint2)(0x80008081, 0x80000000),
 	(uint2)(0x00008009, 0x80000000),
 	(uint2)(0x0000008a, 0x00000000),
 	(uint2)(0x00000088, 0x00000000),
 	(uint2)(0x80008009, 0x00000000),
 	(uint2)(0x8000000a, 0x00000000),
 	(uint2)(0x8000808b, 0x00000000),
 	(uint2)(0x0000008b, 0x80000000),
 	(uint2)(0x00008089, 0x80000000),
 	(uint2)(0x00008003, 0x80000000),
 	(uint2)(0x00008002, 0x80000000),
 	(uint2)(0x00000080, 0x80000000),
 	(uint2)(0x0000800a, 0x00000000),
 	(uint2)(0x8000000a, 0x80000000),
 	(uint2)(0x80008081, 0x80000000),
 	(uint2)(0x00008080, 0x80000000),
 	(uint2)(0x80000001, 0x00000000),
 	(uint2)(0x80008008, 0x80000000),
 };
 void keccak_f1600_round(uint2* a, uint r, uint out_size)
 {
   #if !__ENDIAN_LITTLE__
 	for (uint i = 0; i != 25; ++i)
 		a[i] = a[i].yx;
   #endif
 	uint2 b[25];
 	uint2 t;
 	// Theta
 	b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
 	b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
 	b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
 	b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
 	b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
 	t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
 	a[0] ^= t;
 	a[5] ^= t;
 	a[10] ^= t;
 	a[15] ^= t;
 	a[20] ^= t;
 	t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
 	a[1] ^= t;
 	a[6] ^= t;
 	a[11] ^= t;
 	a[16] ^= t;
 	a[21] ^= t;
 	t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
 	a[2] ^= t;
 	a[7] ^= t;
 	a[12] ^= t;
 	a[17] ^= t;
 	a[22] ^= t;
 	t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
 	a[3] ^= t;
 	a[8] ^= t;
 	a[13] ^= t;
 	a[18] ^= t;
 	a[23] ^= t;
 	t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
 	a[4] ^= t;
 	a[9] ^= t;
 	a[14] ^= t;
 	a[19] ^= t;
 	a[24] ^= t;
 	// Rho Pi
 	b[0] = a[0];
 	b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
 	b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
 	b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
 	b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
 	b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
 	b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
 	b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
 	b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
 	b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
 	b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
 	b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
 	b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
 	b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
 	b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
 	b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
 	b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
 	b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
 	b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
 	b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
 	b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
 	b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
 	b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
 	b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
 	b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
 	// Chi
 	a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
 	a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
 	a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
 	a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
 	if (out_size >= 4)
 	{
 		a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
 		a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
 		a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
 		a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
 		a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
 		if (out_size >= 8)
 		{
 			a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
 			a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
 			a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
 			a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
 			a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
 			a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
 			a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
 			a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
 			a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
 			a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
 			a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
 			a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
 			a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
 			a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
 			a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
 			a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
 		}
 	}
 	// Iota
 	a[0] ^= Keccak_f1600_RC[r];
   #if !__ENDIAN_LITTLE__
 	for (uint i = 0; i != 25; ++i)
 		a[i] = a[i].yx;
   #endif
 }
 void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
 {
 	for (uint i = in_size; i != 25; ++i)
 	{
 		a[i] = 0;
 	}
 #if __ENDIAN_LITTLE__
 	a[in_size] ^= 0x0000000000000001;
 	a[24-out_size*2] ^= 0x8000000000000000;
 #else
 	a[in_size] ^= 0x0100000000000000;
 	a[24-out_size*2] ^= 0x0000000000000080;
 #endif
 	// Originally I unrolled the first and last rounds to interface
 	// better with surrounding code, however I haven't done this
 	// without causing the AMD compiler to blow up the VGPR usage.
 	uint r = 0;
 	do
 	{
 		// This dynamic branch stops the AMD compiler unrolling the loop
 		// and additionally saves about 33% of the VGPRs, enough to gain another
 		// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
 		// massage out of the compiler. It doesn't really seem to matter how
 		// much we try and help the compiler save VGPRs because it seems to throw
 		// that information away, hence the implementation of keccak here
 		// doesn't bother.
 		if (isolate)
 		{
 			keccak_f1600_round((uint2*)a, r++, 25);
 		}
 	}
 	while (r < 23);
 	// final round optimised for digest size
 	keccak_f1600_round((uint2*)a, r++, out_size);
 }
 #define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
 #define countof(x) (sizeof(x) / sizeof(x[0]))
 uint fnv(uint x, uint y)
 {
 	return x * FNV_PRIME ^ y;
 }
 uint4 fnv4(uint4 x, uint4 y)
 {
 	return x * FNV_PRIME ^ y;
 }
 uint fnv_reduce(uint4 v)
 {
 	return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
 }
 typedef union
 {
 	ulong ulongs[32 / sizeof(ulong)];
 	uint uints[32 / sizeof(uint)];
 } hash32_t;
 typedef union
 {
 	ulong ulongs[64 / sizeof(ulong)];
 	uint4 uint4s[64 / sizeof(uint4)];
 } hash64_t;
 typedef union
 {
 	uint uints[128 / sizeof(uint)];
 	uint4 uint4s[128 / sizeof(uint4)];
 } hash128_t;
 hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
 {
 	hash64_t init;
 	uint const init_size = countof(init.ulongs);
 	uint const hash_size = countof(header->ulongs);
 	// sha3_512(header .. nonce)
 	ulong state[25];
 	copy(state, header->ulongs, hash_size);
 	state[hash_size] = nonce;
 	keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
 	copy(init.ulongs, state, init_size);
 	return init;
 }
 uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate)
 {
 	uint4 mix = init;
 	// share init0
 	if (thread_id == 0)
 		*share = mix.x;
 	barrier(CLK_LOCAL_MEM_FENCE);
 	uint init0 = *share;
 	uint a = 0;
 	do
 	{
 		bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
 		#pragma unroll
 		for (uint i = 0; i != 4; ++i)
 		{
 			if (update_share)
 			{
 				uint m[4] = { mix.x, mix.y, mix.z, mix.w };
 				*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
 			}
 			barrier(CLK_LOCAL_MEM_FENCE);
 			mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
 		}
 	} while ((a += 4) != (ACCESSES & isolate));
 	return fnv_reduce(mix);
 }
 uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
 {
 	uint4 mix = init;
 	// share init0
 	if (thread_id == 0)
 		*share = mix.x;
 	barrier(CLK_LOCAL_MEM_FENCE);
 	uint init0 = *share;
 	uint a = 0;
 	do
 	{
 		bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
 		#pragma unroll
 		for (uint i = 0; i != 4; ++i)
 		{
 			if (update_share)
 			{
 				uint m[4] = { mix.x, mix.y, mix.z, mix.w };
 				*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
 			}
 			barrier(CLK_LOCAL_MEM_FENCE);
 			mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
 		}
 	}
 	while ((a += 4) != (ACCESSES & isolate));
 	return fnv_reduce(mix);
 }
 hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
 {
 	ulong state[25];
 	hash32_t hash;
 	uint const hash_size = countof(hash.ulongs);
 	uint const init_size = countof(init->ulongs);
 	uint const mix_size = countof(mix->ulongs);
 	// keccak_256(keccak_512(header..nonce) .. mix);
 	copy(state, init->ulongs, init_size);
 	copy(state + init_size, mix->ulongs, mix_size);
 	keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
 	// copy out
 	copy(hash.ulongs, state, hash_size);
 	return hash;
 }
 hash32_t compute_hash_simple(
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong nonce,
 	uint isolate
 	)
 {
 	hash64_t init = init_hash(g_header, nonce, isolate);
 	hash128_t mix;
 	for (uint i = 0; i != countof(mix.uint4s); ++i)
 	{
 		mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
 	}
 	uint mix_val = mix.uints[0];
 	uint init0 = mix.uints[0];
 	uint a = 0;
 	do
 	{
 		uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
 		uint n = (a+1) % countof(mix.uints);
 		#pragma unroll
 		for (uint i = 0; i != countof(mix.uints); ++i)
 		{
 			mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
 			mix_val = i == n ? mix.uints[i] : mix_val;
 		}
 	}
 	while (++a != (ACCESSES & isolate));
 	// reduce to output
 	hash32_t fnv_mix;
 	for (uint i = 0; i != countof(fnv_mix.uints); ++i)
 	{
 		fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
 	}
 	return final_hash(&init, &fnv_mix, isolate);
 }
 typedef union
 {
 	struct
 	{
 		hash64_t init;
 		uint pad; // avoid lds bank conflicts
 	};
 	hash32_t mix;
 } compute_hash_share;
 hash32_t compute_hash(
 	__local compute_hash_share* share,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong nonce,
 	uint isolate
 	)
 {
 	uint const gid = get_global_id(0);
 	// Compute one init hash per work item.
 	hash64_t init = init_hash(g_header, nonce, isolate);
 	// Threads work together in this phase in groups of 8.
 	uint const thread_id = gid % THREADS_PER_HASH;
 	uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
 	hash32_t mix;
 	uint i = 0;
 	do
 	{
 		// share init with other threads
 		if (i == thread_id)
 			share[hash_id].init = init;
 		barrier(CLK_LOCAL_MEM_FENCE);
 		uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
 		barrier(CLK_LOCAL_MEM_FENCE);
 		uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
 		share[hash_id].mix.uints[thread_id] = thread_mix;
 		barrier(CLK_LOCAL_MEM_FENCE);
 		if (i == thread_id)
 			mix = share[hash_id].mix;
 		barrier(CLK_LOCAL_MEM_FENCE);
 	}
 	while (++i != (THREADS_PER_HASH & isolate));
 	return final_hash(&init, &mix, isolate);
 }
 hash32_t compute_hash_chunks(
 	__local compute_hash_share* share,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	__global hash128_t const* g_dag1,
 	__global hash128_t const* g_dag2,
 	__global hash128_t const* g_dag3,
 	ulong nonce,
 	uint isolate
 	)
 {
 	uint const gid = get_global_id(0);
 	// Compute one init hash per work item.
 	hash64_t init = init_hash(g_header, nonce, isolate);
 	// Threads work together in this phase in groups of 8.
 	uint const thread_id = gid % THREADS_PER_HASH;
 	uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
 	hash32_t mix;
 	uint i = 0;
 	do
 	{
 		// share init with other threads
 		if (i == thread_id)
 			share[hash_id].init = init;
 		barrier(CLK_LOCAL_MEM_FENCE);
 		uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
 		barrier(CLK_LOCAL_MEM_FENCE);
 		uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate);
 		share[hash_id].mix.uints[thread_id] = thread_mix;
 		barrier(CLK_LOCAL_MEM_FENCE);
 		if (i == thread_id)
 			mix = share[hash_id].mix;
 		barrier(CLK_LOCAL_MEM_FENCE);
 	}
 	while (++i != (THREADS_PER_HASH & isolate));
 	return final_hash(&init, &mix, isolate);
 }
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_hash_simple(
 	__global hash32_t* g_hashes,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong start_nonce,
 	uint isolate
 	)
 {
 	uint const gid = get_global_id(0);
 	g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
 }
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_search_simple(
 	__global volatile uint* restrict g_output,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong start_nonce,
 	ulong target,
 	uint isolate
 	)
 {
 	uint const gid = get_global_id(0);
 	hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
 	if (hash.ulongs[countof(hash.ulongs)-1] < target)
 	{
 		uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
 		g_output[slot] = gid;
 	}
 }
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_hash(
 	__global hash32_t* g_hashes,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong start_nonce,
 	uint isolate
 	)
 {
 	__local compute_hash_share share[HASHES_PER_LOOP];
 	uint const gid = get_global_id(0);
 	g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
 }
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_search(
 	__global volatile uint* restrict g_output,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong start_nonce,
 	ulong target,
 	uint isolate
 	)
 {
 	__local compute_hash_share share[HASHES_PER_LOOP];
 	uint const gid = get_global_id(0);
 	hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
 	if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
 	{
 		uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
 		g_output[slot] = gid;
 	}
 }
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_hash_chunks(
 	__global hash32_t* g_hashes,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	__global hash128_t const* g_dag1,
 	__global hash128_t const* g_dag2,
 	__global hash128_t const* g_dag3,
 	ulong start_nonce,
 	uint isolate
 	)
 {
 	__local compute_hash_share share[HASHES_PER_LOOP];
 	uint const gid = get_global_id(0);
 	g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate);
 }
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_search_chunks(
 	__global volatile uint* restrict g_output,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	__global hash128_t const* g_dag1,
 	__global hash128_t const* g_dag2,
 	__global hash128_t const* g_dag3,
 	ulong start_nonce,
 	ulong target,
 	uint isolate
 	)
 {
 	__local compute_hash_share share[HASHES_PER_LOOP];
 	uint const gid = get_global_id(0);
 	hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate);
 	if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
 	{
 		uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
 		g_output[slot] = gid;
 	}
 }
 `
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go
@ -92,7 +92,7 @@ func TestEthashConcurrentVerify(t *testing.T) {
 	defer os.RemoveAll(eth.Full.Dir)
 	block := &testBlock{difficulty: big.NewInt(10)}
-	nonce, md := eth.Search(block, nil)
+	nonce, md := eth.Search(block, nil, 0)
 	block.nonce = nonce
 	block.mixDigest = common.BytesToHash(md)
@ -135,7 +135,7 @@ func TestEthashConcurrentSearch(t *testing.T) {
 	// launch n searches concurrently.
 	for i := 0; i < nsearch; i++ {
 		go func() {
-			nonce, md := eth.Search(block, stop)
+			nonce, md := eth.Search(block, stop, 0)
 			select {
 			case found <- searchRes{n: nonce, md: md}:
 			case <-stop:
@ -167,7 +167,7 @@ func TestEthashSearchAcrossEpoch(t *testing.T) {
 	for i := epochLength - 40; i < epochLength+40; i++ {
 		block := &testBlock{number: i, difficulty: big.NewInt(90)}
 		rand.Read(block.hashNoNonce[:])
-		nonce, md := eth.Search(block, nil)
+		nonce, md := eth.Search(block, nil, 0)
 		block.nonce = nonce
 		block.mixDigest = common.BytesToHash(md)
 		if !eth.Verify(block) {
--- a/6
+++ b/6
@ -6,7 +6,7 @@
 GOBIN = build/bin
 geth:
-	build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/geth
+	build/env.sh go install -v $(shell build/flags.sh) ./cmd/geth
 	@echo "Done building."
 	@echo "Run \"$(GOBIN)/geth\" to launch geth."
@ -39,12 +39,12 @@ evm:
 	@echo "Done building."
 	@echo "Run \"$(GOBIN)/evm to start the evm."
 mist:
-	build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/mist
+	build/env.sh go install -v $(shell build/flags.sh) ./cmd/mist
 	@echo "Done building."
 	@echo "Run \"$(GOBIN)/mist --asset_path=cmd/mist/assets\" to launch mist."
 all:
-	build/env.sh go install -v $(shell build/ldflags.sh) ./...
+	build/env.sh go install -v $(shell build/flags.sh) ./...
 test: all
 	build/env.sh go test ./...
--- a/build/ldflags.sh
+++ b/build/ldflags.sh
@ -16,3 +16,7 @@ sep=$(go version | awk '{ if ($3 >= "go1.5" || index($3, "devel")) print "="; el
 if [ -f ".git/HEAD" ]; then
    echo "-ldflags '-X main.gitCommit$sep$(git rev-parse HEAD)'"
 fi
 if [ ! -z "$GO_OPENCL" ]; then
   echo "-tags opencl"
 fi
--- a/cmd/geth/js_test.go
+++ b/cmd/geth/js_test.go
@ -468,8 +468,7 @@ func processTxs(repl *testjethre, t *testing.T, expTxc int) bool {
 		t.Errorf("incorrect number of pending transactions, expected %v, got %v", expTxc, txc)
 		return false
 	}
-
+	err = repl.ethereum.StartMining(runtime.NumCPU(), "")
 	err = repl.ethereum.StartMining(runtime.NumCPU())
 	if err != nil {
 		t.Errorf("unexpected error mining: %v", err)
 		return false
--- a/cmd/geth/main.go
+++ b/cmd/geth/main.go
@ -104,6 +104,22 @@ The makedag command generates an ethash DAG in /tmp/dag.
 This command exists to support the system testing project.
 Regular users do not need to execute it.
 `,
 		},
 		{
 			Action: gpuinfo,
 			Name:   "gpuinfo",
 			Usage:  "gpuinfo",
 			Description: `
 Prints OpenCL device info for all found GPUs.
 `,
 		},
 		{
 			Action: gpubench,
 			Name:   "gpubench",
 			Usage:  "benchmark GPU",
 			Description: `
 Runs quick benchmark on first GPU found.
 `,
 		},
 		{
@ -298,6 +314,7 @@ JavaScript API. See https://github.com/ethereum/go-ethereum/wiki/Javascipt-Conso
 		utils.GasPriceFlag,
 		utils.MinerThreadsFlag,
 		utils.MiningEnabledFlag,
 		utils.MiningGPUFlag,
 		utils.AutoDAGFlag,
 		utils.NATFlag,
 		utils.NatspecEnabledFlag,
@ -586,7 +603,10 @@ func startEth(ctx *cli.Context, eth *eth.Ethereum) {
 		}
 	}
 	if ctx.GlobalBool(utils.MiningEnabledFlag.Name) {
-		if err := eth.StartMining(ctx.GlobalInt(utils.MinerThreadsFlag.Name)); err != nil {
+		err := eth.StartMining(
 			ctx.GlobalInt(utils.MinerThreadsFlag.Name),
 			ctx.GlobalString(utils.MiningGPUFlag.Name))
 		if err != nil {
 			utils.Fatalf("%v", err)
 		}
 	}
@ -740,6 +760,29 @@ func makedag(ctx *cli.Context) {
 	}
 }
 func gpuinfo(ctx *cli.Context) {
 	eth.PrintOpenCLDevices()
 }
 func gpubench(ctx *cli.Context) {
 	args := ctx.Args()
 	wrongArgs := func() {
 		utils.Fatalf(`Usage: geth gpubench <gpu number>`)
 	}
 	switch {
 	case len(args) == 1:
 		n, err := strconv.ParseUint(args[0], 0, 64)
 		if err != nil {
 			wrongArgs()
 		}
 		eth.GPUBench(n)
 	case len(args) == 0:
 		eth.GPUBench(0)
 	default:
 		wrongArgs()
 	}
 }
 func version(c *cli.Context) {
 	fmt.Println(ClientIdentifier)
 	fmt.Println("Version:", Version)
--- a/cmd/utils/flags.go
+++ b/cmd/utils/flags.go
@ -149,6 +149,12 @@ var (
 	}
 	// miner settings
 	// TODO: refactor CPU vs GPU mining flags
 	MiningGPUFlag = cli.StringFlag{
 		Name:  "minegpu",
 		Usage: "Mine with given GPUs. '--minegpu 0,1' will mine with the first two GPUs found.",
 	}
 	MinerThreadsFlag = cli.IntFlag{
 		Name:  "minerthreads",
 		Usage: "Number of miner threads",
--- a/common/natspec/natspec_e2e_test.go
+++ b/common/natspec/natspec_e2e_test.go
@ -306,7 +306,7 @@ func processTxs(repl *testFrontend, t *testing.T, expTxc int) bool {
 		return false
 	}
-	err = repl.ethereum.StartMining(runtime.NumCPU())
+	err = repl.ethereum.StartMining(runtime.NumCPU(), "")
 	if err != nil {
 		t.Errorf("unexpected error mining: %v", err)
 		return false
--- a/core/chain_makers.go
+++ b/core/chain_makers.go
@ -32,7 +32,7 @@ import (
 // It returns true from Verify for any block.
 type FakePow struct{}
-func (f FakePow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) {
+func (f FakePow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
 	return 0, nil
 }
 func (f FakePow) Verify(block pow.Block) bool { return true }
--- a/core/chain_pow_test.go
+++ b/core/chain_pow_test.go
@ -34,7 +34,7 @@ type failPow struct {
 	failing uint64
 }
-func (pow failPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) {
+func (pow failPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
 	return 0, nil
 }
 func (pow failPow) Verify(block pow.Block) bool { return block.NumberU64() != pow.failing }
@ -47,7 +47,7 @@ type delayedPow struct {
 	delay time.Duration
 }
-func (pow delayedPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) {
+func (pow delayedPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
 	return 0, nil
 }
 func (pow delayedPow) Verify(block pow.Block) bool { time.Sleep(pow.delay); return true }
--- a/eth/backend.go
+++ b/eth/backend.go
@ -481,18 +481,6 @@ func (s *Ethereum) ResetWithGenesisBlock(gb *types.Block) {
 	s.chainManager.ResetWithGenesisBlock(gb)
 }
 func (s *Ethereum) StartMining(threads int) error {
 	eb, err := s.Etherbase()
 	if err != nil {
 		err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
 		glog.V(logger.Error).Infoln(err)
 		return err
 	}
 	go s.miner.Start(eb, threads)
 	return nil
 }
 func (s *Ethereum) Etherbase() (eb common.Address, err error) {
 	eb = s.etherbase
 	if (eb == common.Address{}) {
--- a/eth/cpu_mining.go
+++ b/eth/cpu_mining.go
@ -0,0 +1,54 @@
 // Copyright 2014 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // The go-ethereum library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 // GNU Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 // +build !opencl
 package eth
 import (
 	"errors"
 	"fmt"
 	"github.com/ethereum/go-ethereum/logger"
 	"github.com/ethereum/go-ethereum/logger/glog"
 )
 const disabledInfo = "Set GO_OPENCL and re-build to enable."
 func (s *Ethereum) StartMining(threads int, gpus string) error {
 	eb, err := s.Etherbase()
 	if err != nil {
 		err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
 		glog.V(logger.Error).Infoln(err)
 		return err
 	}
 	if gpus != "" {
 		return errors.New("GPU mining disabled. " + disabledInfo)
 	}
 	// CPU mining
 	go s.miner.Start(eb, threads)
 	return nil
 }
 func GPUBench(gpuid uint64) {
 	fmt.Println("GPU mining disabled. " + disabledInfo)
 }
 func PrintOpenCLDevices() {
 	fmt.Println("OpenCL disabled. " + disabledInfo)
 }
--- a/eth/gpu_mining.go
+++ b/eth/gpu_mining.go
@ -0,0 +1,103 @@
 // Copyright 2014 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // The go-ethereum library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 // GNU Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 // +build opencl
 package eth
 import (
 	"fmt"
 	"math/big"
 	"strconv"
 	"strings"
 	"time"
 	"github.com/ethereum/ethash"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/logger"
 	"github.com/ethereum/go-ethereum/logger/glog"
 	"github.com/ethereum/go-ethereum/miner"
 )
 func (s *Ethereum) StartMining(threads int, gpus string) error {
 	eb, err := s.Etherbase()
 	if err != nil {
 		err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
 		glog.V(logger.Error).Infoln(err)
 		return err
 	}
 	// GPU mining
 	if gpus != "" {
 		var ids []int
 		for _, s := range strings.Split(gpus, ",") {
 			i, err := strconv.Atoi(s)
 			if err != nil {
 				return fmt.Errorf("Invalid GPU id(s): %v", err)
 			}
 			if i < 0 {
 				return fmt.Errorf("Invalid GPU id: %v", i)
 			}
 			ids = append(ids, i)
 		}
 		// TODO: re-creating miner is a bit ugly
 		cl := ethash.NewCL(ids)
 		s.miner = miner.New(s, s.EventMux(), cl)
 		go s.miner.Start(eb, len(ids))
 		return nil
 	}
 	// CPU mining
 	go s.miner.Start(eb, threads)
 	return nil
 }
 func GPUBench(gpuid uint64) {
 	e := ethash.NewCL([]int{int(gpuid)})
 	var h common.Hash
 	bogoHeader := &types.Header{
 		ParentHash: h,
 		Number:     big.NewInt(int64(42)),
 		Difficulty: big.NewInt(int64(999999999999999)),
 	}
 	bogoBlock := types.NewBlock(bogoHeader, nil, nil, nil)
 	err := ethash.InitCL(bogoBlock.NumberU64(), e)
 	if err != nil {
 		fmt.Println("OpenCL init error: ", err)
 		return
 	}
 	stopChan := make(chan struct{})
 	reportHashRate := func() {
 		for {
 			time.Sleep(3 * time.Second)
 			fmt.Printf("hashes/s : %v\n", e.GetHashrate())
 		}
 	}
 	fmt.Printf("Starting benchmark (%v seconds)\n", 60)
 	go reportHashRate()
 	go e.Search(bogoBlock, stopChan, 0)
 	time.Sleep(60 * time.Second)
 	fmt.Println("OK.")
 }
 func PrintOpenCLDevices() {
 	ethash.PrintDevices()
 }
--- a/miner/agent.go
+++ b/miner/agent.go
@ -118,7 +118,7 @@ func (self *CpuAgent) mine(work *Work, stop <-chan struct{}) {
 	glog.V(logger.Debug).Infof("(re)started agent[%d]. mining...\n", self.index)
 	// Mine
-	nonce, mixDigest := self.pow.Search(work.Block, stop)
+	nonce, mixDigest := self.pow.Search(work.Block, stop, self.index)
 	if nonce != 0 {
 		block := work.Block.WithMiningResult(nonce, common.BytesToHash(mixDigest))
 		self.returnCh <- &Result{work, block}
--- a/pow/ezp/pow.go
+++ b/pow/ezp/pow.go
@ -48,7 +48,7 @@ func (pow *EasyPow) Turbo(on bool) {
 	pow.turbo = on
 }
-func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) {
+func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
 	hash := block.HashNoNonce()
 	diff := block.Difficulty()
--- a/pow/pow.go
+++ b/pow/pow.go
@ -17,7 +17,7 @@
 package pow
 type PoW interface {
-	Search(block Block, stop <-chan struct{}) (uint64, []byte)
+	Search(block Block, stop <-chan struct{}, index int) (uint64, []byte)
 	Verify(block Block) bool
 	GetHashrate() int64
 	Turbo(bool)
--- a/rpc/api/miner.go
+++ b/rpc/api/miner.go
@ -100,7 +100,7 @@ func (self *minerApi) StartMiner(req *shared.Request) (interface{}, error) {
 	}
 	self.ethereum.StartAutoDAG()
-	err := self.ethereum.StartMining(args.Threads)
+	err := self.ethereum.StartMining(args.Threads, "")
 	if err == nil {
 		return true, nil
 	}
--- a/xeth/xeth.go
+++ b/xeth/xeth.go
@ -452,7 +452,7 @@ func (self *XEth) ClientVersion() string {
 func (self *XEth) SetMining(shouldmine bool, threads int) bool {
 	ismining := self.backend.IsMining()
 	if shouldmine && !ismining {
-		err := self.backend.StartMining(threads)
+		err := self.backend.StartMining(threads, "")
 		return err == nil
 	}
 	if ismining && !shouldmine {