From 4047ccad2fb73fd2cfd69bf5b8cbfa788871ce0f Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 13 Apr 2017 14:41:24 +0200 Subject: [PATCH] trie: add start key to NodeIterator constructors The 'step' method is split into two parts, 'peek' and 'push'. peek returns the next state but doesn't make it current. The end of iteration was previously tracked by setting 'trie' to nil. End of iteration is now tracked using the 'iteratorEnd' error, which is slightly cleaner and requires less code. --- core/state/dump.go | 4 +- core/state/iterator.go | 4 +- core/state/statedb.go | 2 +- trie/iterator.go | 131 ++++++++++++++++++++++++----------------- trie/iterator_test.go | 65 ++++++++++++++++---- trie/secure_trie.go | 6 +- trie/sync_test.go | 2 +- trie/trie.go | 7 ++- trie/trie_test.go | 2 +- 9 files changed, 148 insertions(+), 75 deletions(-) diff --git a/core/state/dump.go b/core/state/dump.go index 6338ddf88..ffa1a7283 100644 --- a/core/state/dump.go +++ b/core/state/dump.go @@ -45,7 +45,7 @@ func (self *StateDB) RawDump() Dump { Accounts: make(map[string]DumpAccount), } - it := trie.NewIterator(self.trie.NodeIterator()) + it := trie.NewIterator(self.trie.NodeIterator(nil)) for it.Next() { addr := self.trie.GetKey(it.Key) var data Account @@ -62,7 +62,7 @@ func (self *StateDB) RawDump() Dump { Code: common.Bytes2Hex(obj.Code(self.db)), Storage: make(map[string]string), } - storageIt := trie.NewIterator(obj.getTrie(self.db).NodeIterator()) + storageIt := trie.NewIterator(obj.getTrie(self.db).NodeIterator(nil)) for storageIt.Next() { account.Storage[common.Bytes2Hex(self.trie.GetKey(storageIt.Key))] = common.Bytes2Hex(storageIt.Value) } diff --git a/core/state/iterator.go b/core/state/iterator.go index d2dd5a74e..a8a2722ae 100644 --- a/core/state/iterator.go +++ b/core/state/iterator.go @@ -75,7 +75,7 @@ func (it *NodeIterator) step() error { } // Initialize the iterator if we've just started if it.stateIt == nil { - it.stateIt = it.state.trie.NodeIterator() + it.stateIt = it.state.trie.NodeIterator(nil) } // If we had data nodes previously, we surely have at least state nodes if it.dataIt != nil { @@ -118,7 +118,7 @@ func (it *NodeIterator) step() error { if err != nil { return err } - it.dataIt = dataTrie.NodeIterator() + it.dataIt = dataTrie.NodeIterator(nil) if !it.dataIt.Next(true) { it.dataIt = nil } diff --git a/core/state/statedb.go b/core/state/statedb.go index 24381ced5..431f33e02 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -481,7 +481,7 @@ func (db *StateDB) ForEachStorage(addr common.Address, cb func(key, value common cb(h, value) } - it := trie.NewIterator(so.getTrie(db.db).NodeIterator()) + it := trie.NewIterator(so.getTrie(db.db).NodeIterator(nil)) for it.Next() { // ignore cached values key := common.BytesToHash(db.trie.GetKey(it.Key)) diff --git a/trie/iterator.go b/trie/iterator.go index fef5b2593..26ae1d5ad 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -19,10 +19,13 @@ package trie import ( "bytes" "container/heap" + "errors" "github.com/ethereum/go-ethereum/common" ) +var iteratorEnd = errors.New("end of iteration") + // Iterator is a key-value trie iterator that traverses a Trie. type Iterator struct { nodeIt NodeIterator @@ -79,25 +82,24 @@ type nodeIteratorState struct { hash common.Hash // Hash of the node being iterated (nil if not standalone) node node // Trie node being iterated parent common.Hash // Hash of the first full ancestor node (nil if current is the root) - child int // Child to be processed next + index int // Child to be processed next pathlen int // Length of the path to this node } type nodeIterator struct { trie *Trie // Trie being iterated stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state - - err error // Failure set in case of an internal error in the iterator - - path []byte // Path to the current node + err error // Failure set in case of an internal error in the iterator + path []byte // Path to the current node } -// newNodeIterator creates an post-order trie iterator. -func newNodeIterator(trie *Trie) NodeIterator { +func newNodeIterator(trie *Trie, start []byte) NodeIterator { if trie.Hash() == emptyState { return new(nodeIterator) } - return &nodeIterator{trie: trie} + it := &nodeIterator{trie: trie} + it.seek(start) + return it } // Hash returns the hash of the current node @@ -147,6 +149,9 @@ func (it *nodeIterator) Path() []byte { // Error returns the error set in case of an internal error in the iterator func (it *nodeIterator) Error() error { + if it.err == iteratorEnd { + return nil + } return it.err } @@ -155,47 +160,54 @@ func (it *nodeIterator) Error() error { // sets the Error field to the encountered failure. If `descend` is false, // skips iterating over any subnodes of the current node. func (it *nodeIterator) Next(descend bool) bool { - // If the iterator failed previously, don't do anything if it.err != nil { return false } // Otherwise step forward with the iterator and report any errors - if err := it.step(descend); err != nil { + state, parentIndex, path, err := it.peek(descend) + if err != nil { it.err = err return false } - return it.trie != nil + it.push(state, parentIndex, path) + return true } -// step moves the iterator to the next node of the trie. -func (it *nodeIterator) step(descend bool) error { - if it.trie == nil { - // Abort if we reached the end of the iteration - return nil +func (it *nodeIterator) seek(prefix []byte) { + // The path we're looking for is the hex encoded key without terminator. + key := keybytesToHex(prefix) + key = key[:len(key)-1] + // Move forward until we're just before the closest match to key. + for { + state, parentIndex, path, err := it.peek(bytes.HasPrefix(key, it.path)) + if err != nil || bytes.Compare(path, key) >= 0 { + it.err = err + return + } + it.push(state, parentIndex, path) } +} + +// peek creates the next state of the iterator. +func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, error) { if len(it.stack) == 0 { // Initialize the iterator if we've just started. root := it.trie.Hash() - state := &nodeIteratorState{node: it.trie.root, child: -1} + state := &nodeIteratorState{node: it.trie.root, index: -1} if root != emptyRoot { state.hash = root } - it.stack = append(it.stack, state) - return nil + return state, nil, nil, nil } - if !descend { // If we're skipping children, pop the current node first - it.path = it.path[:it.stack[len(it.stack)-1].pathlen] - it.stack = it.stack[:len(it.stack)-1] + it.pop() } // Continue iteration to the next child -outer: for { if len(it.stack) == 0 { - it.trie = nil - return nil + return nil, nil, nil, iteratorEnd } parent := it.stack[len(it.stack)-1] ancestor := parent.hash @@ -203,63 +215,76 @@ outer: ancestor = parent.parent } if node, ok := parent.node.(*fullNode); ok { - // Full node, iterate over children - for parent.child++; parent.child < len(node.Children); parent.child++ { - child := node.Children[parent.child] + // Full node, move to the first non-nil child. + for i := parent.index + 1; i < len(node.Children); i++ { + child := node.Children[i] if child != nil { hash, _ := child.cache() - it.stack = append(it.stack, &nodeIteratorState{ + state := &nodeIteratorState{ hash: common.BytesToHash(hash), node: child, parent: ancestor, - child: -1, + index: -1, pathlen: len(it.path), - }) - it.path = append(it.path, byte(parent.child)) - break outer + } + path := append(it.path, byte(i)) + parent.index = i - 1 + return state, &parent.index, path, nil } } } else if node, ok := parent.node.(*shortNode); ok { // Short node, return the pointer singleton child - if parent.child < 0 { - parent.child++ + if parent.index < 0 { hash, _ := node.Val.cache() - it.stack = append(it.stack, &nodeIteratorState{ + state := &nodeIteratorState{ hash: common.BytesToHash(hash), node: node.Val, parent: ancestor, - child: -1, + index: -1, pathlen: len(it.path), - }) - if hasTerm(node.Key) { - it.path = append(it.path, node.Key[:len(node.Key)-1]...) - } else { - it.path = append(it.path, node.Key...) } - break + var path []byte + if hasTerm(node.Key) { + path = append(it.path, node.Key[:len(node.Key)-1]...) + } else { + path = append(it.path, node.Key...) + } + return state, &parent.index, path, nil } } else if hash, ok := parent.node.(hashNode); ok { // Hash node, resolve the hash child from the database - if parent.child < 0 { - parent.child++ + if parent.index < 0 { node, err := it.trie.resolveHash(hash, nil, nil) if err != nil { - return err + return it.stack[len(it.stack)-1], &parent.index, it.path, err } - it.stack = append(it.stack, &nodeIteratorState{ + state := &nodeIteratorState{ hash: common.BytesToHash(hash), node: node, parent: ancestor, - child: -1, + index: -1, pathlen: len(it.path), - }) - break + } + return state, &parent.index, it.path, nil } } - it.path = it.path[:parent.pathlen] - it.stack = it.stack[:len(it.stack)-1] + // No more child nodes, move back up. + it.pop() } - return nil +} + +func (it *nodeIterator) push(state *nodeIteratorState, parentIndex *int, path []byte) { + it.path = path + it.stack = append(it.stack, state) + if parentIndex != nil { + *parentIndex += 1 + } +} + +func (it *nodeIterator) pop() { + parent := it.stack[len(it.stack)-1] + it.path = it.path[:parent.pathlen] + it.stack = it.stack[:len(it.stack)-1] } func compareNodes(a, b NodeIterator) int { diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 04d51aaf5..f161fd99d 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -17,6 +17,8 @@ package trie import ( + "bytes" + "fmt" "testing" "github.com/ethereum/go-ethereum/common" @@ -42,7 +44,7 @@ func TestIterator(t *testing.T) { trie.Commit() found := make(map[string]string) - it := NewIterator(trie.NodeIterator()) + it := NewIterator(trie.NodeIterator(nil)) for it.Next() { found[string(it.Key)] = string(it.Value) } @@ -72,7 +74,7 @@ func TestIteratorLargeData(t *testing.T) { vals[string(value2.k)] = value2 } - it := NewIterator(trie.NodeIterator()) + it := NewIterator(trie.NodeIterator(nil)) for it.Next() { vals[string(it.Key)].t = true } @@ -99,7 +101,7 @@ func TestNodeIteratorCoverage(t *testing.T) { // Gather all the node hashes found by the iterator hashes := make(map[common.Hash]struct{}) - for it := trie.NodeIterator(); it.Next(true); { + for it := trie.NodeIterator(nil); it.Next(true); { if it.Hash() != (common.Hash{}) { hashes[it.Hash()] = struct{}{} } @@ -117,18 +119,20 @@ func TestNodeIteratorCoverage(t *testing.T) { } } -var testdata1 = []struct{ k, v string }{ - {"bar", "b"}, +type kvs struct{ k, v string } + +var testdata1 = []kvs{ {"barb", "ba"}, - {"bars", "bb"}, {"bard", "bc"}, + {"bars", "bb"}, + {"bar", "b"}, {"fab", "z"}, - {"foo", "a"}, {"food", "ab"}, {"foos", "aa"}, + {"foo", "a"}, } -var testdata2 = []struct{ k, v string }{ +var testdata2 = []kvs{ {"aardvark", "c"}, {"bar", "b"}, {"barb", "bd"}, @@ -140,6 +144,47 @@ var testdata2 = []struct{ k, v string }{ {"jars", "d"}, } +func TestIteratorSeek(t *testing.T) { + trie := newEmpty() + for _, val := range testdata1 { + trie.Update([]byte(val.k), []byte(val.v)) + } + + // Seek to the middle. + it := NewIterator(trie.NodeIterator([]byte("fab"))) + if err := checkIteratorOrder(testdata1[4:], it); err != nil { + t.Fatal(err) + } + + // Seek to a non-existent key. + it = NewIterator(trie.NodeIterator([]byte("barc"))) + if err := checkIteratorOrder(testdata1[1:], it); err != nil { + t.Fatal(err) + } + + // Seek beyond the end. + it = NewIterator(trie.NodeIterator([]byte("z"))) + if err := checkIteratorOrder(nil, it); err != nil { + t.Fatal(err) + } +} + +func checkIteratorOrder(want []kvs, it *Iterator) error { + for it.Next() { + if len(want) == 0 { + return fmt.Errorf("didn't expect any more values, got key %q", it.Key) + } + if !bytes.Equal(it.Key, []byte(want[0].k)) { + return fmt.Errorf("wrong key: got %q, want %q", it.Key, want[0].k) + } + want = want[1:] + } + if len(want) > 0 { + return fmt.Errorf("iterator ended early, want key %q", want[0]) + } + return nil +} + func TestDifferenceIterator(t *testing.T) { triea := newEmpty() for _, val := range testdata1 { @@ -154,7 +199,7 @@ func TestDifferenceIterator(t *testing.T) { trieb.Commit() found := make(map[string]string) - di, _ := NewDifferenceIterator(triea.NodeIterator(), trieb.NodeIterator()) + di, _ := NewDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil)) it := NewIterator(di) for it.Next() { found[string(it.Key)] = string(it.Value) @@ -189,7 +234,7 @@ func TestUnionIterator(t *testing.T) { } trieb.Commit() - di, _ := NewUnionIterator([]NodeIterator{triea.NodeIterator(), trieb.NodeIterator()}) + di, _ := NewUnionIterator([]NodeIterator{triea.NodeIterator(nil), trieb.NodeIterator(nil)}) it := NewIterator(di) all := []struct{ k, v string }{ diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 201716d18..37d1d4b09 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -156,8 +156,10 @@ func (t *SecureTrie) Root() []byte { return t.trie.Root() } -func (t *SecureTrie) NodeIterator() NodeIterator { - return t.trie.NodeIterator() +// NodeIterator returns an iterator that returns nodes of the underlying trie. Iteration +// starts at the key after the given start key. +func (t *SecureTrie) NodeIterator(start []byte) NodeIterator { + return t.trie.NodeIterator(start) } // CommitTo writes all nodes and the secure hash pre-images to the given database. diff --git a/trie/sync_test.go b/trie/sync_test.go index 6d345ad3f..1e27cbb67 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -80,7 +80,7 @@ func checkTrieConsistency(db Database, root common.Hash) error { if err != nil { return nil // // Consider a non existent state consistent } - it := trie.NodeIterator() + it := trie.NodeIterator(nil) for it.Next(true) { } return it.Error() diff --git a/trie/trie.go b/trie/trie.go index dbffc0ac3..5759f97e3 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -125,9 +125,10 @@ func New(root common.Hash, db Database) (*Trie, error) { return trie, nil } -// Iterator returns an iterator over all mappings in the trie. -func (t *Trie) NodeIterator() NodeIterator { - return newNodeIterator(t) +// NodeIterator returns an iterator that returns nodes of the trie. Iteration starts at +// the key after the given start key. +func (t *Trie) NodeIterator(start []byte) NodeIterator { + return newNodeIterator(t, start) } // Get returns the value for key stored in the trie. diff --git a/trie/trie_test.go b/trie/trie_test.go index cacb08824..61adbba0c 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -439,7 +439,7 @@ func runRandTest(rt randTest) bool { tr = newtr case opItercheckhash: checktr, _ := New(common.Hash{}, nil) - it := NewIterator(tr.NodeIterator()) + it := NewIterator(tr.NodeIterator(nil)) for it.Next() { checktr.Update(it.Key, it.Value) }