diff --git a/common/bitutil/bitutil.go b/common/bitutil/bitutil.go new file mode 100644 index 000000000..117616543 --- /dev/null +++ b/common/bitutil/bitutil.go @@ -0,0 +1,188 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Adapted from: https://golang.org/src/crypto/cipher/xor.go + +// Package bitutil implements fast bitwise operations. +package bitutil + +import ( + "runtime" + "unsafe" +) + +const wordSize = int(unsafe.Sizeof(uintptr(0))) +const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" + +// XORBytes xors the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes xor'd. +func XORBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastXORBytes(dst, a, b) + } + return safeXORBytes(dst, a, b) +} + +// fastXORBytes xors in bulk. It only works on architectures that support +// unaligned read/writes. +func fastXORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] ^ bw[i] + } + } + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] ^ b[i] + } + return n +} + +// safeXORBytes xors one by one. It works on all architectures, independent if +// it supports unaligned read/writes or not. +func safeXORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] ^ b[i] + } + return n +} + +// ANDBytes ands the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes and'd. +func ANDBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastANDBytes(dst, a, b) + } + return safeANDBytes(dst, a, b) +} + +// fastANDBytes ands in bulk. It only works on architectures that support +// unaligned read/writes. +func fastANDBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] & bw[i] + } + } + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] & b[i] + } + return n +} + +// safeANDBytes ands one by one. It works on all architectures, independent if +// it supports unaligned read/writes or not. +func safeANDBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] & b[i] + } + return n +} + +// ORBytes ors the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes or'd. +func ORBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastORBytes(dst, a, b) + } + return safeORBytes(dst, a, b) +} + +// fastORBytes ors in bulk. It only works on architectures that support +// unaligned read/writes. +func fastORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] | bw[i] + } + } + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] | b[i] + } + return n +} + +// safeORBytes ors one by one. It works on all architectures, independent if +// it supports unaligned read/writes or not. +func safeORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] | b[i] + } + return n +} + +// TestBytes tests whether any bit is set in the input byte slice. +func TestBytes(p []byte) bool { + if supportsUnaligned { + return fastTestBytes(p) + } + return safeTestBytes(p) +} + +// fastTestBytes tests for set bits in bulk. It only works on architectures that +// support unaligned read/writes. +func fastTestBytes(p []byte) bool { + n := len(p) + w := n / wordSize + if w > 0 { + pw := *(*[]uintptr)(unsafe.Pointer(&p)) + for i := 0; i < w; i++ { + if pw[i] != 0 { + return true + } + } + } + for i := (n - n%wordSize); i < n; i++ { + if p[i] != 0 { + return true + } + } + return false +} + +// safeTestBytes tests for set bits one byte at a time. It works on all +// architectures, independent if it supports unaligned read/writes or not. +func safeTestBytes(p []byte) bool { + for i := 0; i < len(p); i++ { + if p[i] != 0 { + return true + } + } + return false +} diff --git a/common/bitutil/bitutil_test.go b/common/bitutil/bitutil_test.go new file mode 100644 index 000000000..93647031e --- /dev/null +++ b/common/bitutil/bitutil_test.go @@ -0,0 +1,215 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Adapted from: https://golang.org/src/crypto/cipher/xor_test.go + +package bitutil + +import ( + "bytes" + "testing" +) + +// Tests that bitwise XOR works for various alignments. +func TestXOR(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + XORBytes(d1, p, q) + safeXORBytes(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal", d1, d2) + } + } + } + } +} + +// Tests that bitwise AND works for various alignments. +func TestAND(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + ANDBytes(d1, p, q) + safeANDBytes(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal") + } + } + } + } +} + +// Tests that bitwise OR works for various alignments. +func TestOR(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + ORBytes(d1, p, q) + safeORBytes(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal") + } + } + } + } +} + +// Tests that bit testing works for various alignments. +func TestTest(t *testing.T) { + for align := 0; align < 2; align++ { + // Test for bits set in the bulk part + p := make([]byte, 1023)[align:] + p[100] = 1 + + if TestBytes(p) != safeTestBytes(p) { + t.Error("not equal") + } + // Test for bits set in the tail part + q := make([]byte, 1023)[align:] + q[len(q)-1] = 1 + + if TestBytes(q) != safeTestBytes(q) { + t.Error("not equal") + } + } +} + +// Benchmarks the potentially optimized XOR performance. +func BenchmarkFastXOR1KB(b *testing.B) { benchmarkFastXOR(b, 1024) } +func BenchmarkFastXOR2KB(b *testing.B) { benchmarkFastXOR(b, 2048) } +func BenchmarkFastXOR4KB(b *testing.B) { benchmarkFastXOR(b, 4096) } + +func benchmarkFastXOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + XORBytes(p, p, q) + } +} + +// Benchmarks the baseline XOR performance. +func BenchmarkBaseXOR1KB(b *testing.B) { benchmarkBaseXOR(b, 1024) } +func BenchmarkBaseXOR2KB(b *testing.B) { benchmarkBaseXOR(b, 2048) } +func BenchmarkBaseXOR4KB(b *testing.B) { benchmarkBaseXOR(b, 4096) } + +func benchmarkBaseXOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + safeXORBytes(p, p, q) + } +} + +// Benchmarks the potentially optimized AND performance. +func BenchmarkFastAND1KB(b *testing.B) { benchmarkFastAND(b, 1024) } +func BenchmarkFastAND2KB(b *testing.B) { benchmarkFastAND(b, 2048) } +func BenchmarkFastAND4KB(b *testing.B) { benchmarkFastAND(b, 4096) } + +func benchmarkFastAND(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + ANDBytes(p, p, q) + } +} + +// Benchmarks the baseline AND performance. +func BenchmarkBaseAND1KB(b *testing.B) { benchmarkBaseAND(b, 1024) } +func BenchmarkBaseAND2KB(b *testing.B) { benchmarkBaseAND(b, 2048) } +func BenchmarkBaseAND4KB(b *testing.B) { benchmarkBaseAND(b, 4096) } + +func benchmarkBaseAND(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + safeANDBytes(p, p, q) + } +} + +// Benchmarks the potentially optimized OR performance. +func BenchmarkFastOR1KB(b *testing.B) { benchmarkFastOR(b, 1024) } +func BenchmarkFastOR2KB(b *testing.B) { benchmarkFastOR(b, 2048) } +func BenchmarkFastOR4KB(b *testing.B) { benchmarkFastOR(b, 4096) } + +func benchmarkFastOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + ORBytes(p, p, q) + } +} + +// Benchmarks the baseline OR performance. +func BenchmarkBaseOR1KB(b *testing.B) { benchmarkBaseOR(b, 1024) } +func BenchmarkBaseOR2KB(b *testing.B) { benchmarkBaseOR(b, 2048) } +func BenchmarkBaseOR4KB(b *testing.B) { benchmarkBaseOR(b, 4096) } + +func benchmarkBaseOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + safeORBytes(p, p, q) + } +} + +// Benchmarks the potentially optimized bit testing performance. +func BenchmarkFastTest1KB(b *testing.B) { benchmarkFastTest(b, 1024) } +func BenchmarkFastTest2KB(b *testing.B) { benchmarkFastTest(b, 2048) } +func BenchmarkFastTest4KB(b *testing.B) { benchmarkFastTest(b, 4096) } + +func benchmarkFastTest(b *testing.B, size int) { + p := make([]byte, size) + for i := 0; i < b.N; i++ { + TestBytes(p) + } +} + +// Benchmarks the baseline bit testing performance. +func BenchmarkBaseTest1KB(b *testing.B) { benchmarkBaseTest(b, 1024) } +func BenchmarkBaseTest2KB(b *testing.B) { benchmarkBaseTest(b, 2048) } +func BenchmarkBaseTest4KB(b *testing.B) { benchmarkBaseTest(b, 4096) } + +func benchmarkBaseTest(b *testing.B, size int) { + p := make([]byte, size) + for i := 0; i < b.N; i++ { + safeTestBytes(p) + } +} diff --git a/consensus/ethash/algorithm.go b/consensus/ethash/algorithm.go index 7e8fbfc37..365fe5520 100644 --- a/consensus/ethash/algorithm.go +++ b/consensus/ethash/algorithm.go @@ -27,6 +27,7 @@ import ( "unsafe" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/bitutil" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto/sha3" "github.com/ethereum/go-ethereum/log" @@ -142,7 +143,7 @@ func generateCache(dest []uint32, epoch uint64, seed []byte) { dstOff = j * hashBytes xorOff = (binary.LittleEndian.Uint32(cache[dstOff:]) % uint32(rows)) * hashBytes ) - xorBytes(temp, cache[srcOff:srcOff+hashBytes], cache[xorOff:xorOff+hashBytes]) + bitutil.XORBytes(temp, cache[srcOff:srcOff+hashBytes], cache[xorOff:xorOff+hashBytes]) keccak512(cache[dstOff:], temp) atomic.AddUint32(&progress, 1) diff --git a/consensus/ethash/xor.go b/consensus/ethash/xor.go deleted file mode 100644 index 90e232746..000000000 --- a/consensus/ethash/xor.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Source: https://golang.org/src/crypto/cipher/xor.go - -package ethash - -import ( - "runtime" - "unsafe" -) - -const wordSize = int(unsafe.Sizeof(uintptr(0))) -const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" - -// fastXORBytes xors in bulk. It only works on architectures that -// support unaligned read/writes. -func fastXORBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - - w := n / wordSize - if w > 0 { - dw := *(*[]uintptr)(unsafe.Pointer(&dst)) - aw := *(*[]uintptr)(unsafe.Pointer(&a)) - bw := *(*[]uintptr)(unsafe.Pointer(&b)) - for i := 0; i < w; i++ { - dw[i] = aw[i] ^ bw[i] - } - } - - for i := (n - n%wordSize); i < n; i++ { - dst[i] = a[i] ^ b[i] - } - - return n -} - -func safeXORBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - for i := 0; i < n; i++ { - dst[i] = a[i] ^ b[i] - } - return n -} - -// xorBytes xors the bytes in a and b. The destination is assumed to have enough -// space. Returns the number of bytes xor'd. -func xorBytes(dst, a, b []byte) int { - if supportsUnaligned { - return fastXORBytes(dst, a, b) - } - // TODO(hanwen): if (dst, a, b) have common alignment - // we could still try fastXORBytes. It is not clear - // how often this happens, and it's only worth it if - // the block encryption itself is hardware - // accelerated. - return safeXORBytes(dst, a, b) -} - -// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.) -// The arguments are assumed to be of equal length. -func fastXORWords(dst, a, b []byte) { - dw := *(*[]uintptr)(unsafe.Pointer(&dst)) - aw := *(*[]uintptr)(unsafe.Pointer(&a)) - bw := *(*[]uintptr)(unsafe.Pointer(&b)) - n := len(b) / wordSize - for i := 0; i < n; i++ { - dw[i] = aw[i] ^ bw[i] - } -} - -func xorWords(dst, a, b []byte) { - if supportsUnaligned { - fastXORWords(dst, a, b) - } else { - safeXORBytes(dst, a, b) - } -}