From 7e389963014ebd175260b7128873a85e5c74bb7b Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 5 Nov 2019 19:06:37 +0100 Subject: [PATCH] core/state/snapshot: implement snapshot layer iteration --- core/state/snapshot/difflayer.go | 289 ++++++++++++++++++++ core/state/snapshot/difflayer_test.go | 363 ++++++++++++++++++++++++++ core/state/snapshot/iteration.md | 60 +++++ 3 files changed, 712 insertions(+) create mode 100644 core/state/snapshot/iteration.md diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go index 05d55a6fa..0d97fbdc8 100644 --- a/core/state/snapshot/difflayer.go +++ b/core/state/snapshot/difflayer.go @@ -18,6 +18,7 @@ package snapshot import ( "encoding/binary" + "bytes" "fmt" "math" "math/rand" @@ -475,3 +476,291 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash { dl.storageList[accountHash] = accountStorageList return accountStorageList } + +type Iterator interface { + // Next steps the iterator forward one element, and returns false if + // the iterator is exhausted + Next() bool + // Key returns the current key + Key() common.Hash + // Seek steps the iterator forward as many elements as needed, so that after + // calling Next(), the iterator will be at a key higher than the given hash + Seek(common.Hash) +} + +func (dl *diffLayer) newIterator() Iterator { + dl.AccountList() + return &dlIterator{dl, -1} +} + +type dlIterator struct { + layer *diffLayer + index int +} + +func (it *dlIterator) Next() bool { + if it.index < len(it.layer.accountList) { + it.index++ + } + return it.index < len(it.layer.accountList) +} + +func (it *dlIterator) Key() common.Hash { + if it.index < len(it.layer.accountList) { + return it.layer.accountList[it.index] + } + return common.Hash{} +} + +func (it *dlIterator) Seek(key common.Hash) { + // Search uses binary search to find and return the smallest index i + // in [0, n) at which f(i) is true + size := len(it.layer.accountList) + index := sort.Search(size, + func(i int) bool { + v := it.layer.accountList[i] + return bytes.Compare(key[:], v[:]) < 0 + }) + it.index = index - 1 +} + +type binaryIterator struct { + a Iterator + b Iterator + aDone bool + bDone bool + k common.Hash +} + +func (dl *diffLayer) newBinaryIterator() Iterator { + parent, ok := dl.parent.(*diffLayer) + if !ok { + // parent is the disk layer + return dl.newIterator() + } + l := &binaryIterator{ + a: dl.newIterator(), + b: parent.newBinaryIterator()} + + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +func (it *binaryIterator) Next() bool { + + if it.aDone && it.bDone { + return false + } + nextB := it.b.Key() +first: + nextA := it.a.Key() + if it.aDone { + it.bDone = !it.b.Next() + it.k = nextB + return true + } + if it.bDone { + it.aDone = !it.a.Next() + it.k = nextA + return true + } + if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 { + it.aDone = !it.a.Next() + it.k = nextA + return true + } else if diff == 0 { + // Now we need to advance one of them + it.aDone = !it.a.Next() + goto first + } + it.bDone = !it.b.Next() + it.k = nextB + return true +} + +func (it *binaryIterator) Key() common.Hash { + return it.k +} +func (it *binaryIterator) Seek(key common.Hash) { + panic("todo: implement") +} + +func (dl *diffLayer) iterators() []Iterator { + if parent, ok := dl.parent.(*diffLayer); ok { + iterators := parent.iterators() + return append(iterators, dl.newIterator()) + } + return []Iterator{dl.newIterator()} +} + +// fastIterator is a more optimized multi-layer iterator which maintains a +// direct mapping of all iterators leading down to the bottom layer +type fastIterator struct { + iterators []Iterator + initiated bool +} + +// Len returns the number of active iterators +func (fi *fastIterator) Len() int { + return len(fi.iterators) +} + +// Less implements sort.Interface +func (fi *fastIterator) Less(i, j int) bool { + a := fi.iterators[i].Key() + b := fi.iterators[j].Key() + return bytes.Compare(a[:], b[:]) < 0 +} + +// Swap implements sort.Interface +func (fi *fastIterator) Swap(i, j int) { + fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i] +} + +// Next implements the Iterator interface. It returns false if no more elemnts +// can be retrieved (false == exhausted) +func (fi *fastIterator) Next() bool { + if len(fi.iterators) == 0 { + return false + } + if !fi.initiated { + // Don't forward first time -- we had to 'Next' once in order to + // do the sorting already + fi.initiated = true + return true + } + return fi.innerNext(0) +} + +// innerNext handles the next operation internally, +// and should be invoked when we know that two elements in the list may have +// the same value. +// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke +// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue +// along the list and apply the same operation if needed +func (fi *fastIterator) innerNext(pos int) bool { + if !fi.iterators[pos].Next() { + //Exhausted, remove this iterator + fi.remove(pos) + if len(fi.iterators) == 0 { + return false + } + return true + } + if pos == len(fi.iterators)-1 { + // Only one iterator left + return true + } + // We next:ed the elem at 'pos'. Now we may have to re-sort that elem + val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key() + diff := bytes.Compare(val[:], neighbour[:]) + if diff < 0 { + // It is still in correct place + return true + } + if diff == 0 { + // It has same value as the neighbour. So still in correct place, but + // we need to iterate on the neighbour + fi.innerNext(pos + 1) + return true + } + // At this point, the elem is in the wrong location, but the + // remaining list is sorted. Find out where to move the elem + iterationNeeded := false + index := sort.Search(len(fi.iterators), func(n int) bool { + if n <= pos { + // No need to search 'behind' us + return false + } + if n == len(fi.iterators)-1 { + // Can always place an elem last + return true + } + neighbour := fi.iterators[n+1].Key() + diff := bytes.Compare(val[:], neighbour[:]) + if diff == 0 { + // The elem we're placing it next to has the same value, + // so it's going to need further iteration + iterationNeeded = true + } + return diff < 0 + }) + fi.move(pos, index) + if iterationNeeded { + fi.innerNext(index) + } + return true +} + +// move moves an iterator to another position in the list +func (fi *fastIterator) move(index, newpos int) { + if newpos > len(fi.iterators)-1 { + newpos = len(fi.iterators) - 1 + } + var ( + elem = fi.iterators[index] + middle = fi.iterators[index+1 : newpos+1] + suffix []Iterator + ) + if newpos < len(fi.iterators)-1 { + suffix = fi.iterators[newpos+1:] + } + fi.iterators = append(fi.iterators[:index], middle...) + fi.iterators = append(fi.iterators, elem) + fi.iterators = append(fi.iterators, suffix...) +} + +// remove drops an iterator from the list +func (fi *fastIterator) remove(index int) { + fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...) +} + +// Key returns the current key +func (fi *fastIterator) Key() common.Hash { + return fi.iterators[0].Key() +} + +func (fi *fastIterator) Seek(key common.Hash) { + // We need to apply this across all iterators + var seen = make(map[common.Hash]struct{}) + + length := len(fi.iterators) + for i, it := range fi.iterators { + it.Seek(key) + for { + if !it.Next() { + // To be removed + // swap it to the last position for now + fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i] + length-- + break + } + v := it.Key() + if _, exist := seen[v]; !exist { + seen[v] = struct{}{} + break + } + } + } + // Now remove those that were placed in the end + fi.iterators = fi.iterators[:length] + // The list is now totally unsorted, need to re-sort the entire list + sort.Sort(fi) + fi.initiated = false +} + +// The fast iterator does not query parents as much. +func (dl *diffLayer) newFastIterator() Iterator { + f := &fastIterator{dl.iterators(), false} + f.Seek(common.Hash{}) + return f +} + +// Debug is a convencience helper during testing +func (fi *fastIterator) Debug() { + for _, it := range fi.iterators { + fmt.Printf(" %v ", it.Key()[31]) + } + fmt.Println() +} diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go index 7d7b21eb0..5f914f626 100644 --- a/core/state/snapshot/difflayer_test.go +++ b/core/state/snapshot/difflayer_test.go @@ -18,6 +18,7 @@ package snapshot import ( "bytes" + "encoding/binary" "math/big" "math/rand" "testing" @@ -347,3 +348,365 @@ func BenchmarkJournal(b *testing.B) { layer.Journal(new(bytes.Buffer)) } } + +// TestIteratorBasics tests some simple single-layer iteration +func TestIteratorBasics(t *testing.T) { + var ( + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + // Fill up a parent + for i := 0; i < 100; i++ { + h := randomHash() + data := randomAccount() + accounts[h] = data + if rand.Intn(20) < 10 { + accStorage := make(map[common.Hash][]byte) + value := make([]byte, 32) + rand.Read(value) + accStorage[randomHash()] = value + storage[h] = accStorage + } + } + // Add some (identical) layers on top + parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage) + it := parent.newIterator() + verifyIterator(t, 100, it) +} + +type testIterator struct { + values []byte +} + +func newTestIterator(values ...byte) *testIterator { + return &testIterator{values} +} +func (ti *testIterator) Next() bool { + ti.values = ti.values[1:] + if len(ti.values) == 0 { + return false + } + return true +} + +func (ti *testIterator) Key() common.Hash { + return common.BytesToHash([]byte{ti.values[0]}) +} + +func (ti *testIterator) Seek(common.Hash) { + panic("implement me") +} + +func TestFastIteratorBasics(t *testing.T) { + type testCase struct { + lists [][]byte + expKeys []byte + } + for i, tc := range []testCase{ + {lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4}, + {7, 14, 15}, {9, 13, 15, 16}}, + expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}}, + {lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9}, + {9, 10}, {10, 13, 15, 16}}, + expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}}, + } { + var iterators []Iterator + for _, data := range tc.lists { + iterators = append(iterators, newTestIterator(data...)) + + } + fi := &fastIterator{ + iterators: iterators, + initiated: false, + } + count := 0 + for fi.Next() { + if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got { + t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp) + } + count++ + } + } +} + +func verifyIterator(t *testing.T, expCount int, it Iterator) { + var ( + i = 0 + last = common.Hash{} + ) + for it.Next() { + v := it.Key() + if bytes.Compare(last[:], v[:]) >= 0 { + t.Errorf("Wrong order:\n%x \n>=\n%x", last, v) + } + i++ + } + if i != expCount { + t.Errorf("iterator len wrong, expected %d, got %d", expCount, i) + } +} + +// TestIteratorTraversal tests some simple multi-layer iteration +func TestIteratorTraversal(t *testing.T) { + var ( + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + + mkAccounts := func(args ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, h := range args { + accounts[common.HexToHash(h)] = randomAccount() + } + return accounts + } + // entries in multiple layers should only become output once + parent := newDiffLayer(emptyLayer{}, common.Hash{}, + mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) + + child := parent.Update(common.Hash{}, + mkAccounts("0xbb", "0xdd", "0xf0"), storage) + + child = child.Update(common.Hash{}, + mkAccounts("0xcc", "0xf0", "0xff"), storage) + + // single layer iterator + verifyIterator(t, 3, child.newIterator()) + // multi-layered binary iterator + verifyIterator(t, 7, child.newBinaryIterator()) + // multi-layered fast iterator + verifyIterator(t, 7, child.newFastIterator()) +} + +func TestIteratorLargeTraversal(t *testing.T) { + // This testcase is a bit notorious -- all layers contain the exact + // same 200 accounts. + var storage = make(map[common.Hash]map[common.Hash][]byte) + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + parent := newDiffLayer(emptyLayer{}, common.Hash{}, + mkAccounts(200), storage) + child := parent.Update(common.Hash{}, + mkAccounts(200), storage) + for i := 2; i < 100; i++ { + child = child.Update(common.Hash{}, + mkAccounts(200), storage) + } + // single layer iterator + verifyIterator(t, 200, child.newIterator()) + // multi-layered binary iterator + verifyIterator(t, 200, child.newBinaryIterator()) + // multi-layered fast iterator + verifyIterator(t, 200, child.newFastIterator()) +} + +// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact +// same 200 accounts. That means that we need to process 2000 items, but only +// spit out 200 values eventually. +// +//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op +//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op +func BenchmarkIteratorTraversal(b *testing.B) { + + var storage = make(map[common.Hash]map[common.Hash][]byte) + + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + parent := newDiffLayer(emptyLayer{}, common.Hash{}, + mkAccounts(200), storage) + + child := parent.Update(common.Hash{}, + mkAccounts(200), storage) + + for i := 2; i < 100; i++ { + child = child.Update(common.Hash{}, + mkAccounts(200), storage) + + } + // We call this once before the benchmark, so the creation of + // sorted accountlists are not included in the results. + child.newBinaryIterator() + b.Run("binary iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newBinaryIterator() + for it.Next() { + got++ + } + if exp := 200; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("fast iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newFastIterator() + for it.Next() { + got++ + } + if exp := 200; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) +} + +// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where +// the baselayer is a lot larger than the upper layer. +// +// This is heavy on the binary iterator, which in most cases will have to +// call recursively 100 times for the majority of the values +// +// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op +// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op +func BenchmarkIteratorLargeBaselayer(b *testing.B) { + var storage = make(map[common.Hash]map[common.Hash][]byte) + + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + + parent := newDiffLayer(emptyLayer{}, common.Hash{}, + mkAccounts(2000), storage) + + child := parent.Update(common.Hash{}, + mkAccounts(20), storage) + + for i := 2; i < 100; i++ { + child = child.Update(common.Hash{}, + mkAccounts(20), storage) + + } + // We call this once before the benchmark, so the creation of + // sorted accountlists are not included in the results. + child.newBinaryIterator() + b.Run("binary iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newBinaryIterator() + for it.Next() { + got++ + } + if exp := 2000; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("fast iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newFastIterator() + for it.Next() { + got++ + } + if exp := 2000; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) +} + +// TestIteratorFlatting tests what happens when we +// - have a live iterator on child C (parent C1 -> C2 .. CN) +// - flattens C2 all the way into CN +// - continues iterating +// Right now, this "works" simply because the keys do not change -- the +// iterator is not aware that a layer has become stale. This naive +// solution probably won't work in the long run, however +func TestIteratorFlattning(t *testing.T) { + var ( + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + mkAccounts := func(args ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, h := range args { + accounts[common.HexToHash(h)] = randomAccount() + } + return accounts + } + // entries in multiple layers should only become output once + parent := newDiffLayer(emptyLayer{}, common.Hash{}, + mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) + + child := parent.Update(common.Hash{}, + mkAccounts("0xbb", "0xdd", "0xf0"), storage) + + child = child.Update(common.Hash{}, + mkAccounts("0xcc", "0xf0", "0xff"), storage) + + it := child.newFastIterator() + child.parent.(*diffLayer).flatten() + // The parent should now be stale + verifyIterator(t, 7, it) +} + +func TestIteratorSeek(t *testing.T) { + storage := make(map[common.Hash]map[common.Hash][]byte) + mkAccounts := func(args ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, h := range args { + accounts[common.HexToHash(h)] = randomAccount() + } + return accounts + } + parent := newDiffLayer(emptyLayer{}, common.Hash{}, + mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) + it := parent.newIterator() + // expected: ee, f0, ff + it.Seek(common.HexToHash("0xdd")) + verifyIterator(t, 3, it) + + it = parent.newIterator().(*dlIterator) + // expected: ee, f0, ff + it.Seek(common.HexToHash("0xaa")) + verifyIterator(t, 3, it) + + it = parent.newIterator().(*dlIterator) + // expected: nothing + it.Seek(common.HexToHash("0xff")) + verifyIterator(t, 0, it) + + child := parent.Update(common.Hash{}, + mkAccounts("0xbb", "0xdd", "0xf0"), storage) + + child = child.Update(common.Hash{}, + mkAccounts("0xcc", "0xf0", "0xff"), storage) + + it = child.newFastIterator() + // expected: cc, dd, ee, f0, ff + it.Seek(common.HexToHash("0xbb")) + verifyIterator(t, 5, it) + + it = child.newFastIterator() + it.Seek(common.HexToHash("0xef")) + // exp: f0, ff + verifyIterator(t, 2, it) + + it = child.newFastIterator() + it.Seek(common.HexToHash("0xf0")) + verifyIterator(t, 1, it) + + it.Seek(common.HexToHash("0xff")) + verifyIterator(t, 0, it) + +} diff --git a/core/state/snapshot/iteration.md b/core/state/snapshot/iteration.md new file mode 100644 index 000000000..ca1962d42 --- /dev/null +++ b/core/state/snapshot/iteration.md @@ -0,0 +1,60 @@ + +## How the fast iterator works + +Consider the following example, where we have `6` iterators, sorted from +left to right in ascending order. + +Our 'primary' `A` iterator is on the left, containing the elements `[0,1,8]` +``` + A B C D E F + + 0 1 2 4 7 9 + 1 2 9 - 14 13 + 8 8 - 15 15 + - - - 16 + - +``` +When we call `Next` on the primary iterator, we get (ignoring the future keys) + +``` +A B C D E F + +1 1 2 4 7 9 +``` +We detect that we now got an equality between our element and the next element. +And we need to continue `Next`ing on the next element + +``` +1 2 2 4 7 9 +``` +And move on: +``` +A B C D E F + +1 2 9 4 7 9 +``` +Now we broke out of the equality, but we need to re-sort the element `C` + +``` +A B D E F C + +1 2 4 7 9 9 +``` + +And after shifting it rightwards, we check equality again, and find `C == F`, and thus +call `Next` on `C` + +``` +A B D E F C + +1 2 4 7 9 - +``` +At this point, `C` was exhausted, and is removed + +``` +A B D E F + +1 2 4 7 9 +``` +And we're done with this step. +