core/types: optimize bloom filters (#21624)

* core/types: tests for bloom * core/types: refactored bloom filter for receipts, added tests core/types: replaced old bloom implementation core/types: change interface of bloom add+test * core/types: refactor bloom * core/types: minor tweak on LogsBloom Co-authored-by: Marius van der Wijden <m.vanderwijden@live.de>
2020-10-06 15:57:00 +02:00 · 2020-10-06 15:57:00 +02:00 · e43d827a19
commit e43d827a19
parent eb87121300
2 changed files with 168 additions and 69 deletions
--- a/core/types/bloom9.go
+++ b/core/types/bloom9.go
@ -17,6 +17,7 @@
 package types

 import (
+	"encoding/binary"
 	"fmt"
 	"math/big"

@ -57,28 +58,36 @@ func (b *Bloom) SetBytes(d []byte) {
 }

 // Add adds d to the filter. Future calls of Test(d) will return true.
-func (b *Bloom) Add(d *big.Int) {
-	bin := new(big.Int).SetBytes(b[:])
-	bin.Or(bin, bloom9(d.Bytes()))
-	b.SetBytes(bin.Bytes())
+func (b *Bloom) Add(d []byte) {
+	b.add(d, make([]byte, 6))
+}
+
+// add is internal version of Add, which takes a scratch buffer for reuse (needs to be at least 6 bytes)
+func (b *Bloom) add(d []byte, buf []byte) {
+	i1, v1, i2, v2, i3, v3 := bloomValues(d, buf)
+	b[i1] |= v1
+	b[i2] |= v2
+	b[i3] |= v3
 }

 // Big converts b to a big integer.
+// Note: Converting a bloom filter to a big.Int and then calling GetBytes
+// does not return the same bytes, since big.Int will trim leading zeroes
 func (b Bloom) Big() *big.Int {
 	return new(big.Int).SetBytes(b[:])
 }

+// Bytes returns the backing byte slice of the bloom
 func (b Bloom) Bytes() []byte {
 	return b[:]
 }

-func (b Bloom) Test(test *big.Int) bool {
-	return BloomLookup(b, test)
-}
-
-func (b Bloom) TestBytes(test []byte) bool {
-	return b.Test(new(big.Int).SetBytes(test))
-
+// Test checks if the given topic is present in the bloom filter
+func (b Bloom) Test(topic []byte) bool {
+	i1, v1, i2, v2, i3, v3 := bloomValues(topic, make([]byte, 6))
+	return v1 == v1&b[i1] &&
+		v2 == v2&b[i2] &&
+		v3 == v3&b[i3]
 }

 // MarshalText encodes b as a hex string with 0x prefix.
@ -91,46 +100,61 @@ func (b *Bloom) UnmarshalText(input []byte) error {
 	return hexutil.UnmarshalFixedText("Bloom", input, b[:])
 }

+// CreateBloom creates a bloom filter out of the give Receipts (+Logs)
 func CreateBloom(receipts Receipts) Bloom {
-	bin := new(big.Int)
+	buf := make([]byte, 6)
+	var bin Bloom
 	for _, receipt := range receipts {
-		bin.Or(bin, LogsBloom(receipt.Logs))
-	}
-
-	return BytesToBloom(bin.Bytes())
-}
-
-func LogsBloom(logs []*Log) *big.Int {
-	bin := new(big.Int)
-	for _, log := range logs {
-		bin.Or(bin, bloom9(log.Address.Bytes()))
-		for _, b := range log.Topics {
-			bin.Or(bin, bloom9(b[:]))
+		for _, log := range receipt.Logs {
+			bin.add(log.Address.Bytes(), buf)
+			for _, b := range log.Topics {
+				bin.add(b[:], buf)
+			}
 		}
 	}
-
 	return bin
 }

-func bloom9(b []byte) *big.Int {
-	b = crypto.Keccak256(b)
-
-	r := new(big.Int)
-
-	for i := 0; i < 6; i += 2 {
-		t := big.NewInt(1)
-		b := (uint(b[i+1]) + (uint(b[i]) << 8)) & 2047
-		r.Or(r, t.Lsh(t, b))
+// LogsBloom returns the bloom bytes for the given logs
+func LogsBloom(logs []*Log) []byte {
+	buf := make([]byte, 6)
+	var bin Bloom
+	for _, log := range logs {
+		bin.add(log.Address.Bytes(), buf)
+		for _, b := range log.Topics {
+			bin.add(b[:], buf)
+		}
 	}
-
-	return r
+	return bin[:]
 }

-var Bloom9 = bloom9
+// Bloom9 returns the bloom filter for the given data
+func Bloom9(data []byte) []byte {
+	var b Bloom
+	b.SetBytes(data)
+	return b.Bytes()
+}

+// bloomValues returns the bytes (index-value pairs) to set for the given data
+func bloomValues(data []byte, hashbuf []byte) (uint, byte, uint, byte, uint, byte) {
+	sha := hasherPool.Get().(crypto.KeccakState)
+	sha.Reset()
+	sha.Write(data)
+	sha.Read(hashbuf)
+	hasherPool.Put(sha)
+	// The actual bits to flip
+	v1 := byte(1 << (hashbuf[1] & 0x7))
+	v2 := byte(1 << (hashbuf[3] & 0x7))
+	v3 := byte(1 << (hashbuf[5] & 0x7))
+	// The indices for the bytes to OR in
+	i1 := BloomByteLength - uint((binary.BigEndian.Uint16(hashbuf)&0x7ff)>>3) - 1
+	i2 := BloomByteLength - uint((binary.BigEndian.Uint16(hashbuf[2:])&0x7ff)>>3) - 1
+	i3 := BloomByteLength - uint((binary.BigEndian.Uint16(hashbuf[4:])&0x7ff)>>3) - 1
+
+	return i1, v1, i2, v2, i3, v3
+}
+
+// BloomLookup is a convenience-method to check presence int he bloom filter
 func BloomLookup(bin Bloom, topic bytesBacked) bool {
-	bloom := bin.Big()
-	cmp := bloom9(topic.Bytes())
-
-	return bloom.And(bloom, cmp).Cmp(cmp) == 0
+	return bin.Test(topic.Bytes())
 }
--- a/core/types/bloom9_test.go
+++ b/core/types/bloom9_test.go
@ -17,8 +17,12 @@
 package types

 import (
+	"fmt"
 	"math/big"
 	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/crypto"
 )

 func TestBloom(t *testing.T) {
@ -35,47 +39,118 @@ func TestBloom(t *testing.T) {

 	var bloom Bloom
 	for _, data := range positive {
-		bloom.Add(new(big.Int).SetBytes([]byte(data)))
+		bloom.Add([]byte(data))
 	}

 	for _, data := range positive {
-		if !bloom.TestBytes([]byte(data)) {
+		if !bloom.Test([]byte(data)) {
 			t.Error("expected", data, "to test true")
 		}
 	}
 	for _, data := range negative {
-		if bloom.TestBytes([]byte(data)) {
+		if bloom.Test([]byte(data)) {
 			t.Error("did not expect", data, "to test true")
 		}
 	}
 }

-/*
-import (
-	"testing"
-
-	"github.com/ethereum/go-ethereum/core/state"
-)
-
-func TestBloom9(t *testing.T) {
-	testCase := []byte("testtest")
-	bin := LogsBloom([]state.Log{
-		{testCase, [][]byte{[]byte("hellohello")}, nil},
-	}).Bytes()
-	res := BloomLookup(bin, testCase)
-
-	if !res {
-		t.Errorf("Bloom lookup failed")
+// TestBloomExtensively does some more thorough tests
+func TestBloomExtensively(t *testing.T) {
+	var exp = common.HexToHash("c8d3ca65cdb4874300a9e39475508f23ed6da09fdbc487f89a2dcf50b09eb263")
+	var b Bloom
+	// Add 100 "random" things
+	for i := 0; i < 100; i++ {
+		data := fmt.Sprintf("xxxxxxxxxx data %d yyyyyyyyyyyyyy", i)
+		b.Add([]byte(data))
+		//b.Add(new(big.Int).SetBytes([]byte(data)))
+	}
+	got := crypto.Keccak256Hash(b.Bytes())
+	if got != exp {
+		t.Errorf("Got %x, exp %x", got, exp)
+	}
+	var b2 Bloom
+	b2.SetBytes(b.Bytes())
+	got2 := crypto.Keccak256Hash(b2.Bytes())
+	if got != got2 {
+		t.Errorf("Got %x, exp %x", got, got2)
 	}
 }

-
-func TestAddress(t *testing.T) {
-	block := &Block{}
-	block.Coinbase = common.Hex2Bytes("22341ae42d6dd7384bc8584e50419ea3ac75b83f")
-	fmt.Printf("%x\n", crypto.Keccak256(block.Coinbase))
-
-	bin := CreateBloom(block)
-	fmt.Printf("bin = %x\n", common.LeftPadBytes(bin, 64))
+func BenchmarkBloom9(b *testing.B) {
+	test := []byte("testestestest")
+	for i := 0; i < b.N; i++ {
+		Bloom9(test)
+	}
+}
+
+func BenchmarkBloom9Lookup(b *testing.B) {
+	toTest := []byte("testtest")
+	bloom := new(Bloom)
+	for i := 0; i < b.N; i++ {
+		bloom.Test(toTest)
+	}
+}
+
+func BenchmarkCreateBloom(b *testing.B) {
+
+	var txs = Transactions{
+		NewContractCreation(1, big.NewInt(1), 1, big.NewInt(1), nil),
+		NewTransaction(2, common.HexToAddress("0x2"), big.NewInt(2), 2, big.NewInt(2), nil),
+	}
+	var rSmall = Receipts{
+		&Receipt{
+			Status:            ReceiptStatusFailed,
+			CumulativeGasUsed: 1,
+			Logs: []*Log{
+				{Address: common.BytesToAddress([]byte{0x11})},
+				{Address: common.BytesToAddress([]byte{0x01, 0x11})},
+			},
+			TxHash:          txs[0].Hash(),
+			ContractAddress: common.BytesToAddress([]byte{0x01, 0x11, 0x11}),
+			GasUsed:         1,
+		},
+		&Receipt{
+			PostState:         common.Hash{2}.Bytes(),
+			CumulativeGasUsed: 3,
+			Logs: []*Log{
+				{Address: common.BytesToAddress([]byte{0x22})},
+				{Address: common.BytesToAddress([]byte{0x02, 0x22})},
+			},
+			TxHash:          txs[1].Hash(),
+			ContractAddress: common.BytesToAddress([]byte{0x02, 0x22, 0x22}),
+			GasUsed:         2,
+		},
+	}
+
+	var rLarge = make(Receipts, 200)
+	// Fill it with 200 receipts x 2 logs
+	for i := 0; i < 200; i += 2 {
+		copy(rLarge[i:], rSmall)
+	}
+	b.Run("small", func(b *testing.B) {
+		b.ReportAllocs()
+		var bl Bloom
+		for i := 0; i < b.N; i++ {
+			bl = CreateBloom(rSmall)
+		}
+		b.StopTimer()
+		var exp = common.HexToHash("c384c56ece49458a427c67b90fefe979ebf7104795be65dc398b280f24104949")
+		got := crypto.Keccak256Hash(bl.Bytes())
+		if got != exp {
+			b.Errorf("Got %x, exp %x", got, exp)
+		}
+	})
+	b.Run("large", func(b *testing.B) {
+		b.ReportAllocs()
+		var bl Bloom
+		for i := 0; i < b.N; i++ {
+			bl = CreateBloom(rLarge)
+		}
+		b.StopTimer()
+		var exp = common.HexToHash("c384c56ece49458a427c67b90fefe979ebf7104795be65dc398b280f24104949")
+		got := crypto.Keccak256Hash(bl.Bytes())
+		if got != exp {
+			b.Errorf("Got %x, exp %x", got, exp)
+		}
+	})
 }
-*/