core/state/snapshot: implement snapshot layer iteration

This commit is contained in:
Martin Holst Swende 2019-11-05 19:06:37 +01:00 committed by Péter Szilágyi
parent 22c494d399
commit 7e38996301
No known key found for this signature in database
GPG Key ID: E9AE538CEDF8293D
3 changed files with 712 additions and 0 deletions

View File

@ -18,6 +18,7 @@ package snapshot
import (
"encoding/binary"
"bytes"
"fmt"
"math"
"math/rand"
@ -475,3 +476,291 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
dl.storageList[accountHash] = accountStorageList
return accountStorageList
}
type Iterator interface {
// Next steps the iterator forward one element, and returns false if
// the iterator is exhausted
Next() bool
// Key returns the current key
Key() common.Hash
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash
Seek(common.Hash)
}
func (dl *diffLayer) newIterator() Iterator {
dl.AccountList()
return &dlIterator{dl, -1}
}
type dlIterator struct {
layer *diffLayer
index int
}
func (it *dlIterator) Next() bool {
if it.index < len(it.layer.accountList) {
it.index++
}
return it.index < len(it.layer.accountList)
}
func (it *dlIterator) Key() common.Hash {
if it.index < len(it.layer.accountList) {
return it.layer.accountList[it.index]
}
return common.Hash{}
}
func (it *dlIterator) Seek(key common.Hash) {
// Search uses binary search to find and return the smallest index i
// in [0, n) at which f(i) is true
size := len(it.layer.accountList)
index := sort.Search(size,
func(i int) bool {
v := it.layer.accountList[i]
return bytes.Compare(key[:], v[:]) < 0
})
it.index = index - 1
}
type binaryIterator struct {
a Iterator
b Iterator
aDone bool
bDone bool
k common.Hash
}
func (dl *diffLayer) newBinaryIterator() Iterator {
parent, ok := dl.parent.(*diffLayer)
if !ok {
// parent is the disk layer
return dl.newIterator()
}
l := &binaryIterator{
a: dl.newIterator(),
b: parent.newBinaryIterator()}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
func (it *binaryIterator) Next() bool {
if it.aDone && it.bDone {
return false
}
nextB := it.b.Key()
first:
nextA := it.a.Key()
if it.aDone {
it.bDone = !it.b.Next()
it.k = nextB
return true
}
if it.bDone {
it.aDone = !it.a.Next()
it.k = nextA
return true
}
if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
it.aDone = !it.a.Next()
it.k = nextA
return true
} else if diff == 0 {
// Now we need to advance one of them
it.aDone = !it.a.Next()
goto first
}
it.bDone = !it.b.Next()
it.k = nextB
return true
}
func (it *binaryIterator) Key() common.Hash {
return it.k
}
func (it *binaryIterator) Seek(key common.Hash) {
panic("todo: implement")
}
func (dl *diffLayer) iterators() []Iterator {
if parent, ok := dl.parent.(*diffLayer); ok {
iterators := parent.iterators()
return append(iterators, dl.newIterator())
}
return []Iterator{dl.newIterator()}
}
// fastIterator is a more optimized multi-layer iterator which maintains a
// direct mapping of all iterators leading down to the bottom layer
type fastIterator struct {
iterators []Iterator
initiated bool
}
// Len returns the number of active iterators
func (fi *fastIterator) Len() int {
return len(fi.iterators)
}
// Less implements sort.Interface
func (fi *fastIterator) Less(i, j int) bool {
a := fi.iterators[i].Key()
b := fi.iterators[j].Key()
return bytes.Compare(a[:], b[:]) < 0
}
// Swap implements sort.Interface
func (fi *fastIterator) Swap(i, j int) {
fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
}
// Next implements the Iterator interface. It returns false if no more elemnts
// can be retrieved (false == exhausted)
func (fi *fastIterator) Next() bool {
if len(fi.iterators) == 0 {
return false
}
if !fi.initiated {
// Don't forward first time -- we had to 'Next' once in order to
// do the sorting already
fi.initiated = true
return true
}
return fi.innerNext(0)
}
// innerNext handles the next operation internally,
// and should be invoked when we know that two elements in the list may have
// the same value.
// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
// along the list and apply the same operation if needed
func (fi *fastIterator) innerNext(pos int) bool {
if !fi.iterators[pos].Next() {
//Exhausted, remove this iterator
fi.remove(pos)
if len(fi.iterators) == 0 {
return false
}
return true
}
if pos == len(fi.iterators)-1 {
// Only one iterator left
return true
}
// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff < 0 {
// It is still in correct place
return true
}
if diff == 0 {
// It has same value as the neighbour. So still in correct place, but
// we need to iterate on the neighbour
fi.innerNext(pos + 1)
return true
}
// At this point, the elem is in the wrong location, but the
// remaining list is sorted. Find out where to move the elem
iterationNeeded := false
index := sort.Search(len(fi.iterators), func(n int) bool {
if n <= pos {
// No need to search 'behind' us
return false
}
if n == len(fi.iterators)-1 {
// Can always place an elem last
return true
}
neighbour := fi.iterators[n+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff == 0 {
// The elem we're placing it next to has the same value,
// so it's going to need further iteration
iterationNeeded = true
}
return diff < 0
})
fi.move(pos, index)
if iterationNeeded {
fi.innerNext(index)
}
return true
}
// move moves an iterator to another position in the list
func (fi *fastIterator) move(index, newpos int) {
if newpos > len(fi.iterators)-1 {
newpos = len(fi.iterators) - 1
}
var (
elem = fi.iterators[index]
middle = fi.iterators[index+1 : newpos+1]
suffix []Iterator
)
if newpos < len(fi.iterators)-1 {
suffix = fi.iterators[newpos+1:]
}
fi.iterators = append(fi.iterators[:index], middle...)
fi.iterators = append(fi.iterators, elem)
fi.iterators = append(fi.iterators, suffix...)
}
// remove drops an iterator from the list
func (fi *fastIterator) remove(index int) {
fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
}
// Key returns the current key
func (fi *fastIterator) Key() common.Hash {
return fi.iterators[0].Key()
}
func (fi *fastIterator) Seek(key common.Hash) {
// We need to apply this across all iterators
var seen = make(map[common.Hash]struct{})
length := len(fi.iterators)
for i, it := range fi.iterators {
it.Seek(key)
for {
if !it.Next() {
// To be removed
// swap it to the last position for now
fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
length--
break
}
v := it.Key()
if _, exist := seen[v]; !exist {
seen[v] = struct{}{}
break
}
}
}
// Now remove those that were placed in the end
fi.iterators = fi.iterators[:length]
// The list is now totally unsorted, need to re-sort the entire list
sort.Sort(fi)
fi.initiated = false
}
// The fast iterator does not query parents as much.
func (dl *diffLayer) newFastIterator() Iterator {
f := &fastIterator{dl.iterators(), false}
f.Seek(common.Hash{})
return f
}
// Debug is a convencience helper during testing
func (fi *fastIterator) Debug() {
for _, it := range fi.iterators {
fmt.Printf(" %v ", it.Key()[31])
}
fmt.Println()
}

View File

@ -18,6 +18,7 @@ package snapshot
import (
"bytes"
"encoding/binary"
"math/big"
"math/rand"
"testing"
@ -347,3 +348,365 @@ func BenchmarkJournal(b *testing.B) {
layer.Journal(new(bytes.Buffer))
}
}
// TestIteratorBasics tests some simple single-layer iteration
func TestIteratorBasics(t *testing.T) {
var (
accounts = make(map[common.Hash][]byte)
storage = make(map[common.Hash]map[common.Hash][]byte)
)
// Fill up a parent
for i := 0; i < 100; i++ {
h := randomHash()
data := randomAccount()
accounts[h] = data
if rand.Intn(20) < 10 {
accStorage := make(map[common.Hash][]byte)
value := make([]byte, 32)
rand.Read(value)
accStorage[randomHash()] = value
storage[h] = accStorage
}
}
// Add some (identical) layers on top
parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
it := parent.newIterator()
verifyIterator(t, 100, it)
}
type testIterator struct {
values []byte
}
func newTestIterator(values ...byte) *testIterator {
return &testIterator{values}
}
func (ti *testIterator) Next() bool {
ti.values = ti.values[1:]
if len(ti.values) == 0 {
return false
}
return true
}
func (ti *testIterator) Key() common.Hash {
return common.BytesToHash([]byte{ti.values[0]})
}
func (ti *testIterator) Seek(common.Hash) {
panic("implement me")
}
func TestFastIteratorBasics(t *testing.T) {
type testCase struct {
lists [][]byte
expKeys []byte
}
for i, tc := range []testCase{
{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
{7, 14, 15}, {9, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
{9, 10}, {10, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
} {
var iterators []Iterator
for _, data := range tc.lists {
iterators = append(iterators, newTestIterator(data...))
}
fi := &fastIterator{
iterators: iterators,
initiated: false,
}
count := 0
for fi.Next() {
if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
}
count++
}
}
}
func verifyIterator(t *testing.T, expCount int, it Iterator) {
var (
i = 0
last = common.Hash{}
)
for it.Next() {
v := it.Key()
if bytes.Compare(last[:], v[:]) >= 0 {
t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
}
i++
}
if i != expCount {
t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
}
}
// TestIteratorTraversal tests some simple multi-layer iteration
func TestIteratorTraversal(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
// entries in multiple layers should only become output once
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
// single layer iterator
verifyIterator(t, 3, child.newIterator())
// multi-layered binary iterator
verifyIterator(t, 7, child.newBinaryIterator())
// multi-layered fast iterator
verifyIterator(t, 7, child.newFastIterator())
}
func TestIteratorLargeTraversal(t *testing.T) {
// This testcase is a bit notorious -- all layers contain the exact
// same 200 accounts.
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts(200), storage)
child := parent.Update(common.Hash{},
mkAccounts(200), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(200), storage)
}
// single layer iterator
verifyIterator(t, 200, child.newIterator())
// multi-layered binary iterator
verifyIterator(t, 200, child.newBinaryIterator())
// multi-layered fast iterator
verifyIterator(t, 200, child.newFastIterator())
}
// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
// same 200 accounts. That means that we need to process 2000 items, but only
// spit out 200 values eventually.
//
//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op
//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op
func BenchmarkIteratorTraversal(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts(200), storage)
child := parent.Update(common.Hash{},
mkAccounts(200), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(200), storage)
}
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryIterator()
b.Run("binary iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryIterator()
for it.Next() {
got++
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastIterator()
for it.Next() {
got++
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
// the baselayer is a lot larger than the upper layer.
//
// This is heavy on the binary iterator, which in most cases will have to
// call recursively 100 times for the majority of the values
//
// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op
// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op
func BenchmarkIteratorLargeBaselayer(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts(2000), storage)
child := parent.Update(common.Hash{},
mkAccounts(20), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(20), storage)
}
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryIterator()
b.Run("binary iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryIterator()
for it.Next() {
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastIterator()
for it.Next() {
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// TestIteratorFlatting tests what happens when we
// - have a live iterator on child C (parent C1 -> C2 .. CN)
// - flattens C2 all the way into CN
// - continues iterating
// Right now, this "works" simply because the keys do not change -- the
// iterator is not aware that a layer has become stale. This naive
// solution probably won't work in the long run, however
func TestIteratorFlattning(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
// entries in multiple layers should only become output once
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
it := child.newFastIterator()
child.parent.(*diffLayer).flatten()
// The parent should now be stale
verifyIterator(t, 7, it)
}
func TestIteratorSeek(t *testing.T) {
storage := make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
it := parent.newIterator()
// expected: ee, f0, ff
it.Seek(common.HexToHash("0xdd"))
verifyIterator(t, 3, it)
it = parent.newIterator().(*dlIterator)
// expected: ee, f0, ff
it.Seek(common.HexToHash("0xaa"))
verifyIterator(t, 3, it)
it = parent.newIterator().(*dlIterator)
// expected: nothing
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
it = child.newFastIterator()
// expected: cc, dd, ee, f0, ff
it.Seek(common.HexToHash("0xbb"))
verifyIterator(t, 5, it)
it = child.newFastIterator()
it.Seek(common.HexToHash("0xef"))
// exp: f0, ff
verifyIterator(t, 2, it)
it = child.newFastIterator()
it.Seek(common.HexToHash("0xf0"))
verifyIterator(t, 1, it)
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
}

View File

@ -0,0 +1,60 @@
## How the fast iterator works
Consider the following example, where we have `6` iterators, sorted from
left to right in ascending order.
Our 'primary' `A` iterator is on the left, containing the elements `[0,1,8]`
```
A B C D E F
0 1 2 4 7 9
1 2 9 - 14 13
8 8 - 15 15
- - - 16
-
```
When we call `Next` on the primary iterator, we get (ignoring the future keys)
```
A B C D E F
1 1 2 4 7 9
```
We detect that we now got an equality between our element and the next element.
And we need to continue `Next`ing on the next element
```
1 2 2 4 7 9
```
And move on:
```
A B C D E F
1 2 9 4 7 9
```
Now we broke out of the equality, but we need to re-sort the element `C`
```
A B D E F C
1 2 4 7 9 9
```
And after shifting it rightwards, we check equality again, and find `C == F`, and thus
call `Next` on `C`
```
A B D E F C
1 2 4 7 9 -
```
At this point, `C` was exhausted, and is removed
```
A B D E F
1 2 4 7 9
```
And we're done with this step.