From 5bf8769fb081c5f17f23b1b55e508789f0a8e632 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 15 Nov 2023 16:20:34 +0100 Subject: [PATCH] ethdb/memorydb, trie: reduced allocations (#28473) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * trie: use pooling of iterator states in iterator The node iterator burns through a lot of memory while iterating a trie, and a lot of that can be avoided by using a fairly small pool (max 40 items). name old time/op new time/op delta Iterator-8 6.22ms ± 3% 5.40ms ± 6% -13.18% (p=0.008 n=5+5) name old alloc/op new alloc/op delta Iterator-8 2.36MB ± 0% 1.67MB ± 0% -29.23% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Iterator-8 37.0k ± 0% 29.8k ± 0% ~ (p=0.079 n=4+5) * ethdb/memorydb: avoid one copying of key By making the transformation from []byte to string at an earlier point, we save an allocation which otherwise happens later on. name old time/op new time/op delta BatchAllocs-8 412µs ± 6% 382µs ± 2% -7.18% (p=0.016 n=5+4) name old alloc/op new alloc/op delta BatchAllocs-8 480kB ± 0% 490kB ± 0% +1.93% (p=0.008 n=5+5) name old allocs/op new allocs/op delta BatchAllocs-8 3.03k ± 0% 2.03k ± 0% -32.98% (p=0.008 n=5+5) --- ethdb/memorydb/memorydb.go | 14 +++---- ethdb/memorydb/memorydb_test.go | 18 +++++++++ trie/iterator.go | 71 +++++++++++++++++++++------------ trie/iterator_test.go | 12 ++++++ trie/sync_test.go | 2 +- 5 files changed, 83 insertions(+), 34 deletions(-) diff --git a/ethdb/memorydb/memorydb.go b/ethdb/memorydb/memorydb.go index f9f74322b..2a939f9a1 100644 --- a/ethdb/memorydb/memorydb.go +++ b/ethdb/memorydb/memorydb.go @@ -207,7 +207,7 @@ func (db *Database) Len() int { // keyvalue is a key-value tuple tagged with a deletion field to allow creating // memory-database write batches. type keyvalue struct { - key []byte + key string value []byte delete bool } @@ -222,14 +222,14 @@ type batch struct { // Put inserts the given value into the batch for later committing. func (b *batch) Put(key, value []byte) error { - b.writes = append(b.writes, keyvalue{common.CopyBytes(key), common.CopyBytes(value), false}) + b.writes = append(b.writes, keyvalue{string(key), common.CopyBytes(value), false}) b.size += len(key) + len(value) return nil } // Delete inserts the a key removal into the batch for later committing. func (b *batch) Delete(key []byte) error { - b.writes = append(b.writes, keyvalue{common.CopyBytes(key), nil, true}) + b.writes = append(b.writes, keyvalue{string(key), nil, true}) b.size += len(key) return nil } @@ -249,10 +249,10 @@ func (b *batch) Write() error { } for _, keyvalue := range b.writes { if keyvalue.delete { - delete(b.db.db, string(keyvalue.key)) + delete(b.db.db, keyvalue.key) continue } - b.db.db[string(keyvalue.key)] = keyvalue.value + b.db.db[keyvalue.key] = keyvalue.value } return nil } @@ -267,12 +267,12 @@ func (b *batch) Reset() { func (b *batch) Replay(w ethdb.KeyValueWriter) error { for _, keyvalue := range b.writes { if keyvalue.delete { - if err := w.Delete(keyvalue.key); err != nil { + if err := w.Delete([]byte(keyvalue.key)); err != nil { return err } continue } - if err := w.Put(keyvalue.key, keyvalue.value); err != nil { + if err := w.Put([]byte(keyvalue.key), keyvalue.value); err != nil { return err } } diff --git a/ethdb/memorydb/memorydb_test.go b/ethdb/memorydb/memorydb_test.go index dba18ad30..51499c3b1 100644 --- a/ethdb/memorydb/memorydb_test.go +++ b/ethdb/memorydb/memorydb_test.go @@ -17,6 +17,7 @@ package memorydb import ( + "encoding/binary" "testing" "github.com/ethereum/go-ethereum/ethdb" @@ -30,3 +31,20 @@ func TestMemoryDB(t *testing.T) { }) }) } + +// BenchmarkBatchAllocs measures the time/allocs for storing 120 kB of data +func BenchmarkBatchAllocs(b *testing.B) { + b.ReportAllocs() + var key = make([]byte, 20) + var val = make([]byte, 100) + // 120 * 1_000 -> 120_000 == 120kB + for i := 0; i < b.N; i++ { + batch := New().NewBatch() + for j := uint64(0); j < 1000; j++ { + binary.BigEndian.PutUint64(key, j) + binary.BigEndian.PutUint64(val, j) + batch.Put(key, val) + } + batch.Write() + } +} diff --git a/trie/iterator.go b/trie/iterator.go index 6f054a724..83ccc0740 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -144,7 +144,8 @@ type nodeIterator struct { path []byte // Path to the current node err error // Failure set in case of an internal error in the iterator - resolver NodeResolver // optional node resolver for avoiding disk hits + resolver NodeResolver // optional node resolver for avoiding disk hits + pool []*nodeIteratorState // local pool for iteratorstates } // errIteratorEnd is stored in nodeIterator.err when iteration is done. @@ -172,6 +173,24 @@ func newNodeIterator(trie *Trie, start []byte) NodeIterator { return it } +func (it *nodeIterator) putInPool(item *nodeIteratorState) { + if len(it.pool) < 40 { + item.node = nil + it.pool = append(it.pool, item) + } +} + +func (it *nodeIterator) getFromPool() *nodeIteratorState { + idx := len(it.pool) - 1 + if idx < 0 { + return new(nodeIteratorState) + } + el := it.pool[idx] + it.pool[idx] = nil + it.pool = it.pool[:idx] + return el +} + func (it *nodeIterator) AddResolver(resolver NodeResolver) { it.resolver = resolver } @@ -423,8 +442,9 @@ func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { return nil } -func findChild(n *fullNode, index int, path []byte, ancestor common.Hash) (node, *nodeIteratorState, []byte, int) { +func (it *nodeIterator) findChild(n *fullNode, index int, ancestor common.Hash) (node, *nodeIteratorState, []byte, int) { var ( + path = it.path child node state *nodeIteratorState childPath []byte @@ -433,13 +453,12 @@ func findChild(n *fullNode, index int, path []byte, ancestor common.Hash) (node, if n.Children[index] != nil { child = n.Children[index] hash, _ := child.cache() - state = &nodeIteratorState{ - hash: common.BytesToHash(hash), - node: child, - parent: ancestor, - index: -1, - pathlen: len(path), - } + state = it.getFromPool() + state.hash = common.BytesToHash(hash) + state.node = child + state.parent = ancestor + state.index = -1 + state.pathlen = len(path) childPath = append(childPath, path...) childPath = append(childPath, byte(index)) return child, state, childPath, index @@ -452,7 +471,7 @@ func (it *nodeIterator) nextChild(parent *nodeIteratorState, ancestor common.Has switch node := parent.node.(type) { case *fullNode: // Full node, move to the first non-nil child. - if child, state, path, index := findChild(node, parent.index+1, it.path, ancestor); child != nil { + if child, state, path, index := it.findChild(node, parent.index+1, ancestor); child != nil { parent.index = index - 1 return state, path, true } @@ -460,13 +479,12 @@ func (it *nodeIterator) nextChild(parent *nodeIteratorState, ancestor common.Has // Short node, return the pointer singleton child if parent.index < 0 { hash, _ := node.Val.cache() - state := &nodeIteratorState{ - hash: common.BytesToHash(hash), - node: node.Val, - parent: ancestor, - index: -1, - pathlen: len(it.path), - } + state := it.getFromPool() + state.hash = common.BytesToHash(hash) + state.node = node.Val + state.parent = ancestor + state.index = -1 + state.pathlen = len(it.path) path := append(it.path, node.Key...) return state, path, true } @@ -480,7 +498,7 @@ func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.H switch n := parent.node.(type) { case *fullNode: // Full node, move to the first non-nil child before the desired key position - child, state, path, index := findChild(n, parent.index+1, it.path, ancestor) + child, state, path, index := it.findChild(n, parent.index+1, ancestor) if child == nil { // No more children in this fullnode return parent, it.path, false @@ -492,7 +510,7 @@ func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.H } // The child is before the seek position. Try advancing for { - nextChild, nextState, nextPath, nextIndex := findChild(n, index+1, it.path, ancestor) + nextChild, nextState, nextPath, nextIndex := it.findChild(n, index+1, ancestor) // If we run out of children, or skipped past the target, return the // previous one if nextChild == nil || bytes.Compare(nextPath, key) >= 0 { @@ -506,13 +524,12 @@ func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.H // Short node, return the pointer singleton child if parent.index < 0 { hash, _ := n.Val.cache() - state := &nodeIteratorState{ - hash: common.BytesToHash(hash), - node: n.Val, - parent: ancestor, - index: -1, - pathlen: len(it.path), - } + state := it.getFromPool() + state.hash = common.BytesToHash(hash) + state.node = n.Val + state.parent = ancestor + state.index = -1 + state.pathlen = len(it.path) path := append(it.path, n.Key...) return state, path, true } @@ -533,6 +550,8 @@ func (it *nodeIterator) pop() { it.path = it.path[:last.pathlen] it.stack[len(it.stack)-1] = nil it.stack = it.stack[:len(it.stack)-1] + // last is now unused + it.putInPool(last) } func compareNodes(a, b NodeIterator) int { diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 57d1f06a1..9679b49ca 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -616,3 +616,15 @@ func isTrieNode(scheme string, key, val []byte) (bool, []byte, common.Hash) { } return true, path, hash } + +func BenchmarkIterator(b *testing.B) { + diskDb, srcDb, tr, _ := makeTestTrie(rawdb.HashScheme) + root := tr.Hash() + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := checkTrieConsistency(diskDb, srcDb.Scheme(), root, false); err != nil { + b.Fatal(err) + } + } +} diff --git a/trie/sync_test.go b/trie/sync_test.go index 3b7986ef6..7032c6d2f 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -571,7 +571,7 @@ func testIncompleteSync(t *testing.T, scheme string) { hash := crypto.Keccak256Hash(result.Data) if hash != root { addedKeys = append(addedKeys, result.Path) - addedHashes = append(addedHashes, crypto.Keccak256Hash(result.Data)) + addedHashes = append(addedHashes, hash) } } // Fetch the next batch to retrieve