core/state/snapshot: move iterator out into its own files

This commit is contained in:
Péter Szilágyi 2019-12-05 15:37:25 +02:00
parent 7e38996301
commit e567675473
No known key found for this signature in database
GPG Key ID: E9AE538CEDF8293D
6 changed files with 838 additions and 652 deletions

View File

@ -18,7 +18,6 @@ package snapshot
import ( import (
"encoding/binary" "encoding/binary"
"bytes"
"fmt" "fmt"
"math" "math"
"math/rand" "math/rand"
@ -476,291 +475,3 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
dl.storageList[accountHash] = accountStorageList dl.storageList[accountHash] = accountStorageList
return accountStorageList return accountStorageList
} }
type Iterator interface {
// Next steps the iterator forward one element, and returns false if
// the iterator is exhausted
Next() bool
// Key returns the current key
Key() common.Hash
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash
Seek(common.Hash)
}
func (dl *diffLayer) newIterator() Iterator {
dl.AccountList()
return &dlIterator{dl, -1}
}
type dlIterator struct {
layer *diffLayer
index int
}
func (it *dlIterator) Next() bool {
if it.index < len(it.layer.accountList) {
it.index++
}
return it.index < len(it.layer.accountList)
}
func (it *dlIterator) Key() common.Hash {
if it.index < len(it.layer.accountList) {
return it.layer.accountList[it.index]
}
return common.Hash{}
}
func (it *dlIterator) Seek(key common.Hash) {
// Search uses binary search to find and return the smallest index i
// in [0, n) at which f(i) is true
size := len(it.layer.accountList)
index := sort.Search(size,
func(i int) bool {
v := it.layer.accountList[i]
return bytes.Compare(key[:], v[:]) < 0
})
it.index = index - 1
}
type binaryIterator struct {
a Iterator
b Iterator
aDone bool
bDone bool
k common.Hash
}
func (dl *diffLayer) newBinaryIterator() Iterator {
parent, ok := dl.parent.(*diffLayer)
if !ok {
// parent is the disk layer
return dl.newIterator()
}
l := &binaryIterator{
a: dl.newIterator(),
b: parent.newBinaryIterator()}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
func (it *binaryIterator) Next() bool {
if it.aDone && it.bDone {
return false
}
nextB := it.b.Key()
first:
nextA := it.a.Key()
if it.aDone {
it.bDone = !it.b.Next()
it.k = nextB
return true
}
if it.bDone {
it.aDone = !it.a.Next()
it.k = nextA
return true
}
if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
it.aDone = !it.a.Next()
it.k = nextA
return true
} else if diff == 0 {
// Now we need to advance one of them
it.aDone = !it.a.Next()
goto first
}
it.bDone = !it.b.Next()
it.k = nextB
return true
}
func (it *binaryIterator) Key() common.Hash {
return it.k
}
func (it *binaryIterator) Seek(key common.Hash) {
panic("todo: implement")
}
func (dl *diffLayer) iterators() []Iterator {
if parent, ok := dl.parent.(*diffLayer); ok {
iterators := parent.iterators()
return append(iterators, dl.newIterator())
}
return []Iterator{dl.newIterator()}
}
// fastIterator is a more optimized multi-layer iterator which maintains a
// direct mapping of all iterators leading down to the bottom layer
type fastIterator struct {
iterators []Iterator
initiated bool
}
// Len returns the number of active iterators
func (fi *fastIterator) Len() int {
return len(fi.iterators)
}
// Less implements sort.Interface
func (fi *fastIterator) Less(i, j int) bool {
a := fi.iterators[i].Key()
b := fi.iterators[j].Key()
return bytes.Compare(a[:], b[:]) < 0
}
// Swap implements sort.Interface
func (fi *fastIterator) Swap(i, j int) {
fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
}
// Next implements the Iterator interface. It returns false if no more elemnts
// can be retrieved (false == exhausted)
func (fi *fastIterator) Next() bool {
if len(fi.iterators) == 0 {
return false
}
if !fi.initiated {
// Don't forward first time -- we had to 'Next' once in order to
// do the sorting already
fi.initiated = true
return true
}
return fi.innerNext(0)
}
// innerNext handles the next operation internally,
// and should be invoked when we know that two elements in the list may have
// the same value.
// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
// along the list and apply the same operation if needed
func (fi *fastIterator) innerNext(pos int) bool {
if !fi.iterators[pos].Next() {
//Exhausted, remove this iterator
fi.remove(pos)
if len(fi.iterators) == 0 {
return false
}
return true
}
if pos == len(fi.iterators)-1 {
// Only one iterator left
return true
}
// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff < 0 {
// It is still in correct place
return true
}
if diff == 0 {
// It has same value as the neighbour. So still in correct place, but
// we need to iterate on the neighbour
fi.innerNext(pos + 1)
return true
}
// At this point, the elem is in the wrong location, but the
// remaining list is sorted. Find out where to move the elem
iterationNeeded := false
index := sort.Search(len(fi.iterators), func(n int) bool {
if n <= pos {
// No need to search 'behind' us
return false
}
if n == len(fi.iterators)-1 {
// Can always place an elem last
return true
}
neighbour := fi.iterators[n+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff == 0 {
// The elem we're placing it next to has the same value,
// so it's going to need further iteration
iterationNeeded = true
}
return diff < 0
})
fi.move(pos, index)
if iterationNeeded {
fi.innerNext(index)
}
return true
}
// move moves an iterator to another position in the list
func (fi *fastIterator) move(index, newpos int) {
if newpos > len(fi.iterators)-1 {
newpos = len(fi.iterators) - 1
}
var (
elem = fi.iterators[index]
middle = fi.iterators[index+1 : newpos+1]
suffix []Iterator
)
if newpos < len(fi.iterators)-1 {
suffix = fi.iterators[newpos+1:]
}
fi.iterators = append(fi.iterators[:index], middle...)
fi.iterators = append(fi.iterators, elem)
fi.iterators = append(fi.iterators, suffix...)
}
// remove drops an iterator from the list
func (fi *fastIterator) remove(index int) {
fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
}
// Key returns the current key
func (fi *fastIterator) Key() common.Hash {
return fi.iterators[0].Key()
}
func (fi *fastIterator) Seek(key common.Hash) {
// We need to apply this across all iterators
var seen = make(map[common.Hash]struct{})
length := len(fi.iterators)
for i, it := range fi.iterators {
it.Seek(key)
for {
if !it.Next() {
// To be removed
// swap it to the last position for now
fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
length--
break
}
v := it.Key()
if _, exist := seen[v]; !exist {
seen[v] = struct{}{}
break
}
}
}
// Now remove those that were placed in the end
fi.iterators = fi.iterators[:length]
// The list is now totally unsorted, need to re-sort the entire list
sort.Sort(fi)
fi.initiated = false
}
// The fast iterator does not query parents as much.
func (dl *diffLayer) newFastIterator() Iterator {
f := &fastIterator{dl.iterators(), false}
f.Seek(common.Hash{})
return f
}
// Debug is a convencience helper during testing
func (fi *fastIterator) Debug() {
for _, it := range fi.iterators {
fmt.Printf(" %v ", it.Key()[31])
}
fmt.Println()
}

View File

@ -18,7 +18,6 @@ package snapshot
import ( import (
"bytes" "bytes"
"encoding/binary"
"math/big" "math/big"
"math/rand" "math/rand"
"testing" "testing"
@ -348,365 +347,3 @@ func BenchmarkJournal(b *testing.B) {
layer.Journal(new(bytes.Buffer)) layer.Journal(new(bytes.Buffer))
} }
} }
// TestIteratorBasics tests some simple single-layer iteration
func TestIteratorBasics(t *testing.T) {
var (
accounts = make(map[common.Hash][]byte)
storage = make(map[common.Hash]map[common.Hash][]byte)
)
// Fill up a parent
for i := 0; i < 100; i++ {
h := randomHash()
data := randomAccount()
accounts[h] = data
if rand.Intn(20) < 10 {
accStorage := make(map[common.Hash][]byte)
value := make([]byte, 32)
rand.Read(value)
accStorage[randomHash()] = value
storage[h] = accStorage
}
}
// Add some (identical) layers on top
parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
it := parent.newIterator()
verifyIterator(t, 100, it)
}
type testIterator struct {
values []byte
}
func newTestIterator(values ...byte) *testIterator {
return &testIterator{values}
}
func (ti *testIterator) Next() bool {
ti.values = ti.values[1:]
if len(ti.values) == 0 {
return false
}
return true
}
func (ti *testIterator) Key() common.Hash {
return common.BytesToHash([]byte{ti.values[0]})
}
func (ti *testIterator) Seek(common.Hash) {
panic("implement me")
}
func TestFastIteratorBasics(t *testing.T) {
type testCase struct {
lists [][]byte
expKeys []byte
}
for i, tc := range []testCase{
{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
{7, 14, 15}, {9, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
{9, 10}, {10, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
} {
var iterators []Iterator
for _, data := range tc.lists {
iterators = append(iterators, newTestIterator(data...))
}
fi := &fastIterator{
iterators: iterators,
initiated: false,
}
count := 0
for fi.Next() {
if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
}
count++
}
}
}
func verifyIterator(t *testing.T, expCount int, it Iterator) {
var (
i = 0
last = common.Hash{}
)
for it.Next() {
v := it.Key()
if bytes.Compare(last[:], v[:]) >= 0 {
t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
}
i++
}
if i != expCount {
t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
}
}
// TestIteratorTraversal tests some simple multi-layer iteration
func TestIteratorTraversal(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
// entries in multiple layers should only become output once
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
// single layer iterator
verifyIterator(t, 3, child.newIterator())
// multi-layered binary iterator
verifyIterator(t, 7, child.newBinaryIterator())
// multi-layered fast iterator
verifyIterator(t, 7, child.newFastIterator())
}
func TestIteratorLargeTraversal(t *testing.T) {
// This testcase is a bit notorious -- all layers contain the exact
// same 200 accounts.
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts(200), storage)
child := parent.Update(common.Hash{},
mkAccounts(200), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(200), storage)
}
// single layer iterator
verifyIterator(t, 200, child.newIterator())
// multi-layered binary iterator
verifyIterator(t, 200, child.newBinaryIterator())
// multi-layered fast iterator
verifyIterator(t, 200, child.newFastIterator())
}
// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
// same 200 accounts. That means that we need to process 2000 items, but only
// spit out 200 values eventually.
//
//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op
//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op
func BenchmarkIteratorTraversal(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts(200), storage)
child := parent.Update(common.Hash{},
mkAccounts(200), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(200), storage)
}
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryIterator()
b.Run("binary iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryIterator()
for it.Next() {
got++
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastIterator()
for it.Next() {
got++
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
// the baselayer is a lot larger than the upper layer.
//
// This is heavy on the binary iterator, which in most cases will have to
// call recursively 100 times for the majority of the values
//
// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op
// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op
func BenchmarkIteratorLargeBaselayer(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts(2000), storage)
child := parent.Update(common.Hash{},
mkAccounts(20), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(20), storage)
}
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryIterator()
b.Run("binary iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryIterator()
for it.Next() {
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastIterator()
for it.Next() {
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// TestIteratorFlatting tests what happens when we
// - have a live iterator on child C (parent C1 -> C2 .. CN)
// - flattens C2 all the way into CN
// - continues iterating
// Right now, this "works" simply because the keys do not change -- the
// iterator is not aware that a layer has become stale. This naive
// solution probably won't work in the long run, however
func TestIteratorFlattning(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
// entries in multiple layers should only become output once
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
it := child.newFastIterator()
child.parent.(*diffLayer).flatten()
// The parent should now be stale
verifyIterator(t, 7, it)
}
func TestIteratorSeek(t *testing.T) {
storage := make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer{}, common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
it := parent.newIterator()
// expected: ee, f0, ff
it.Seek(common.HexToHash("0xdd"))
verifyIterator(t, 3, it)
it = parent.newIterator().(*dlIterator)
// expected: ee, f0, ff
it.Seek(common.HexToHash("0xaa"))
verifyIterator(t, 3, it)
it = parent.newIterator().(*dlIterator)
// expected: nothing
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
it = child.newFastIterator()
// expected: cc, dd, ee, f0, ff
it.Seek(common.HexToHash("0xbb"))
verifyIterator(t, 5, it)
it = child.newFastIterator()
it.Seek(common.HexToHash("0xef"))
// exp: f0, ff
verifyIterator(t, 2, it)
it = child.newFastIterator()
it.Seek(common.HexToHash("0xf0"))
verifyIterator(t, 1, it)
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
}

View File

@ -0,0 +1,116 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"bytes"
"sort"
"github.com/ethereum/go-ethereum/common"
)
// AccountIterator is an iterator to step over all the accounts in a snapshot,
// which may or may npt be composed of multiple layers.
type AccountIterator interface {
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash.
Seek(hash common.Hash)
// Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected).
Next() bool
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
Error() error
// Key returns the hash of the account the iterator is currently at.
Key() common.Hash
// Value returns the RLP encoded slim account the iterator is currently at.
// An error will be returned if the iterator becomes invalid (e.g. snaph
Value() []byte
}
// diffAccountIterator is an account iterator that steps over the accounts (both
// live and deleted) contained within a single
type diffAccountIterator struct {
layer *diffLayer
index int
}
func (dl *diffLayer) newAccountIterator() *diffAccountIterator {
dl.AccountList()
return &diffAccountIterator{layer: dl, index: -1}
}
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash.
func (it *diffAccountIterator) Seek(key common.Hash) {
// Search uses binary search to find and return the smallest index i
// in [0, n) at which f(i) is true
index := sort.Search(len(it.layer.accountList), func(i int) bool {
return bytes.Compare(key[:], it.layer.accountList[i][:]) < 0
})
it.index = index - 1
}
// Next steps the iterator forward one element, returning false if exhausted.
func (it *diffAccountIterator) Next() bool {
if it.index < len(it.layer.accountList) {
it.index++
}
return it.index < len(it.layer.accountList)
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
//
// A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil.
func (it *diffAccountIterator) Error() error {
return nil
}
// Key returns the hash of the account the iterator is currently at.
func (it *diffAccountIterator) Key() common.Hash {
if it.index < len(it.layer.accountList) {
return it.layer.accountList[it.index]
}
return common.Hash{}
}
// Value returns the RLP encoded slim account the iterator is currently at.
func (it *diffAccountIterator) Value() []byte {
it.layer.lock.RLock()
defer it.layer.lock.RUnlock()
hash := it.layer.accountList[it.index]
if data, ok := it.layer.accountData[hash]; ok {
return data
}
panic("iterator references non-existent layer account")
}
func (dl *diffLayer) iterators() []AccountIterator {
if parent, ok := dl.parent.(*diffLayer); ok {
iterators := parent.iterators()
return append(iterators, dl.newAccountIterator())
}
return []AccountIterator{dl.newAccountIterator()}
}

View File

@ -0,0 +1,115 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"bytes"
"github.com/ethereum/go-ethereum/common"
)
// binaryAccountIterator is a simplistic iterator to step over the accounts in
// a snapshot, which may or may npt be composed of multiple layers. Performance
// wise this iterator is slow, it's meant for cross validating the fast one,
type binaryAccountIterator struct {
a *diffAccountIterator
b AccountIterator
aDone bool
bDone bool
k common.Hash
fail error
}
// newBinaryAccountIterator creates a simplistic account iterator to step over
// all the accounts in a slow, but eaily verifyable way.
func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
parent, ok := dl.parent.(*diffLayer)
if !ok {
// parent is the disk layer
return dl.newAccountIterator()
}
l := &binaryAccountIterator{
a: dl.newAccountIterator(),
b: parent.newBinaryAccountIterator(),
}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash.
func (it *binaryAccountIterator) Seek(key common.Hash) {
panic("todo: implement")
}
// Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected).
func (it *binaryAccountIterator) Next() bool {
if it.aDone && it.bDone {
return false
}
nextB := it.b.Key()
first:
nextA := it.a.Key()
if it.aDone {
it.bDone = !it.b.Next()
it.k = nextB
return true
}
if it.bDone {
it.aDone = !it.a.Next()
it.k = nextA
return true
}
if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
it.aDone = !it.a.Next()
it.k = nextA
return true
} else if diff == 0 {
// Now we need to advance one of them
it.aDone = !it.a.Next()
goto first
}
it.bDone = !it.b.Next()
it.k = nextB
return true
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
func (it *binaryAccountIterator) Error() error {
return it.fail
}
// Key returns the hash of the account the iterator is currently at.
func (it *binaryAccountIterator) Key() common.Hash {
return it.k
}
// Value returns the RLP encoded slim account the iterator is currently at, or
// nil if the iterated snapshot stack became stale (you can check Error after
// to see if it failed or not).
func (it *binaryAccountIterator) Value() []byte {
blob, err := it.a.layer.AccountRLP(it.k)
if err != nil {
it.fail = err
return nil
}
return blob
}

View File

@ -0,0 +1,211 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"bytes"
"fmt"
"sort"
"github.com/ethereum/go-ethereum/common"
)
// fastAccountIterator is a more optimized multi-layer iterator which maintains a
// direct mapping of all iterators leading down to the bottom layer
type fastAccountIterator struct {
iterators []AccountIterator
initiated bool
fail error
}
// The fast iterator does not query parents as much.
func (dl *diffLayer) newFastAccountIterator() AccountIterator {
f := &fastAccountIterator{
iterators: dl.iterators(),
initiated: false,
}
f.Seek(common.Hash{})
return f
}
// Len returns the number of active iterators
func (fi *fastAccountIterator) Len() int {
return len(fi.iterators)
}
// Less implements sort.Interface
func (fi *fastAccountIterator) Less(i, j int) bool {
a := fi.iterators[i].Key()
b := fi.iterators[j].Key()
return bytes.Compare(a[:], b[:]) < 0
}
// Swap implements sort.Interface
func (fi *fastAccountIterator) Swap(i, j int) {
fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
}
func (fi *fastAccountIterator) Seek(key common.Hash) {
// We need to apply this across all iterators
var seen = make(map[common.Hash]struct{})
length := len(fi.iterators)
for i, it := range fi.iterators {
it.Seek(key)
for {
if !it.Next() {
// To be removed
// swap it to the last position for now
fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
length--
break
}
v := it.Key()
if _, exist := seen[v]; !exist {
seen[v] = struct{}{}
break
}
}
}
// Now remove those that were placed in the end
fi.iterators = fi.iterators[:length]
// The list is now totally unsorted, need to re-sort the entire list
sort.Sort(fi)
fi.initiated = false
}
// Next implements the Iterator interface. It returns false if no more elemnts
// can be retrieved (false == exhausted)
func (fi *fastAccountIterator) Next() bool {
if len(fi.iterators) == 0 {
return false
}
if !fi.initiated {
// Don't forward first time -- we had to 'Next' once in order to
// do the sorting already
fi.initiated = true
return true
}
return fi.innerNext(0)
}
// innerNext handles the next operation internally,
// and should be invoked when we know that two elements in the list may have
// the same value.
// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
// along the list and apply the same operation if needed
func (fi *fastAccountIterator) innerNext(pos int) bool {
if !fi.iterators[pos].Next() {
//Exhausted, remove this iterator
fi.remove(pos)
if len(fi.iterators) == 0 {
return false
}
return true
}
if pos == len(fi.iterators)-1 {
// Only one iterator left
return true
}
// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff < 0 {
// It is still in correct place
return true
}
if diff == 0 {
// It has same value as the neighbour. So still in correct place, but
// we need to iterate on the neighbour
fi.innerNext(pos + 1)
return true
}
// At this point, the elem is in the wrong location, but the
// remaining list is sorted. Find out where to move the elem
iterationNeeded := false
index := sort.Search(len(fi.iterators), func(n int) bool {
if n <= pos {
// No need to search 'behind' us
return false
}
if n == len(fi.iterators)-1 {
// Can always place an elem last
return true
}
neighbour := fi.iterators[n+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff == 0 {
// The elem we're placing it next to has the same value,
// so it's going to need further iteration
iterationNeeded = true
}
return diff < 0
})
fi.move(pos, index)
if iterationNeeded {
fi.innerNext(index)
}
return true
}
// move moves an iterator to another position in the list
func (fi *fastAccountIterator) move(index, newpos int) {
if newpos > len(fi.iterators)-1 {
newpos = len(fi.iterators) - 1
}
var (
elem = fi.iterators[index]
middle = fi.iterators[index+1 : newpos+1]
suffix []AccountIterator
)
if newpos < len(fi.iterators)-1 {
suffix = fi.iterators[newpos+1:]
}
fi.iterators = append(fi.iterators[:index], middle...)
fi.iterators = append(fi.iterators, elem)
fi.iterators = append(fi.iterators, suffix...)
}
// remove drops an iterator from the list
func (fi *fastAccountIterator) remove(index int) {
fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
func (fi *fastAccountIterator) Error() error {
return fi.fail
}
// Key returns the current key
func (fi *fastAccountIterator) Key() common.Hash {
return fi.iterators[0].Key()
}
// Value returns the current key
func (fi *fastAccountIterator) Value() []byte {
panic("todo")
}
// Debug is a convencience helper during testing
func (fi *fastAccountIterator) Debug() {
for _, it := range fi.iterators {
fmt.Printf(" %v ", it.Key()[31])
}
fmt.Println()
}

View File

@ -0,0 +1,396 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"bytes"
"encoding/binary"
"math/rand"
"testing"
"github.com/ethereum/go-ethereum/common"
)
// TestIteratorBasics tests some simple single-layer iteration
func TestIteratorBasics(t *testing.T) {
var (
accounts = make(map[common.Hash][]byte)
storage = make(map[common.Hash]map[common.Hash][]byte)
)
// Fill up a parent
for i := 0; i < 100; i++ {
h := randomHash()
data := randomAccount()
accounts[h] = data
if rand.Intn(20) < 10 {
accStorage := make(map[common.Hash][]byte)
value := make([]byte, 32)
rand.Read(value)
accStorage[randomHash()] = value
storage[h] = accStorage
}
}
// Add some (identical) layers on top
parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
it := parent.newAccountIterator()
verifyIterator(t, 100, it)
}
type testIterator struct {
values []byte
}
func newTestIterator(values ...byte) *testIterator {
return &testIterator{values}
}
func (ti *testIterator) Seek(common.Hash) {
panic("implement me")
}
func (ti *testIterator) Next() bool {
ti.values = ti.values[1:]
if len(ti.values) == 0 {
return false
}
return true
}
func (ti *testIterator) Error() error {
panic("implement me")
}
func (ti *testIterator) Key() common.Hash {
return common.BytesToHash([]byte{ti.values[0]})
}
func (ti *testIterator) Value() []byte {
panic("implement me")
}
func TestFastIteratorBasics(t *testing.T) {
type testCase struct {
lists [][]byte
expKeys []byte
}
for i, tc := range []testCase{
{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
{7, 14, 15}, {9, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
{9, 10}, {10, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
} {
var iterators []AccountIterator
for _, data := range tc.lists {
iterators = append(iterators, newTestIterator(data...))
}
fi := &fastAccountIterator{
iterators: iterators,
initiated: false,
}
count := 0
for fi.Next() {
if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
}
count++
}
}
}
func verifyIterator(t *testing.T, expCount int, it AccountIterator) {
var (
i = 0
last = common.Hash{}
)
for it.Next() {
v := it.Key()
if bytes.Compare(last[:], v[:]) >= 0 {
t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
}
i++
}
if i != expCount {
t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
}
}
// TestIteratorTraversal tests some simple multi-layer iteration
func TestIteratorTraversal(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
// entries in multiple layers should only become output once
parent := newDiffLayer(emptyLayer(), common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
// single layer iterator
verifyIterator(t, 3, child.newAccountIterator())
// multi-layered binary iterator
verifyIterator(t, 7, child.newBinaryAccountIterator())
// multi-layered fast iterator
verifyIterator(t, 7, child.newFastAccountIterator())
}
func TestIteratorLargeTraversal(t *testing.T) {
// This testcase is a bit notorious -- all layers contain the exact
// same 200 accounts.
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer(), common.Hash{},
mkAccounts(200), storage)
child := parent.Update(common.Hash{},
mkAccounts(200), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(200), storage)
}
// single layer iterator
verifyIterator(t, 200, child.newAccountIterator())
// multi-layered binary iterator
verifyIterator(t, 200, child.newBinaryAccountIterator())
// multi-layered fast iterator
verifyIterator(t, 200, child.newFastAccountIterator())
}
// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
// same 200 accounts. That means that we need to process 2000 items, but only
// spit out 200 values eventually.
//
//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op
//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op
func BenchmarkIteratorTraversal(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer(), common.Hash{},
mkAccounts(200), storage)
child := parent.Update(common.Hash{},
mkAccounts(200), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(200), storage)
}
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryAccountIterator()
b.Run("binary iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryAccountIterator()
for it.Next() {
got++
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastAccountIterator()
for it.Next() {
got++
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
// the baselayer is a lot larger than the upper layer.
//
// This is heavy on the binary iterator, which in most cases will have to
// call recursively 100 times for the majority of the values
//
// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op
// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op
func BenchmarkIteratorLargeBaselayer(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer(), common.Hash{},
mkAccounts(2000), storage)
child := parent.Update(common.Hash{},
mkAccounts(20), storage)
for i := 2; i < 100; i++ {
child = child.Update(common.Hash{},
mkAccounts(20), storage)
}
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryAccountIterator()
b.Run("binary iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryAccountIterator()
for it.Next() {
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastAccountIterator()
for it.Next() {
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// TestIteratorFlatting tests what happens when we
// - have a live iterator on child C (parent C1 -> C2 .. CN)
// - flattens C2 all the way into CN
// - continues iterating
// Right now, this "works" simply because the keys do not change -- the
// iterator is not aware that a layer has become stale. This naive
// solution probably won't work in the long run, however
func TestIteratorFlattning(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
// entries in multiple layers should only become output once
parent := newDiffLayer(emptyLayer(), common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
it := child.newFastAccountIterator()
child.parent.(*diffLayer).flatten()
// The parent should now be stale
verifyIterator(t, 7, it)
}
func TestIteratorSeek(t *testing.T) {
storage := make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(args ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, h := range args {
accounts[common.HexToHash(h)] = randomAccount()
}
return accounts
}
parent := newDiffLayer(emptyLayer(), common.Hash{},
mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
it := AccountIterator(parent.newAccountIterator())
// expected: ee, f0, ff
it.Seek(common.HexToHash("0xdd"))
verifyIterator(t, 3, it)
it = parent.newAccountIterator()
// expected: ee, f0, ff
it.Seek(common.HexToHash("0xaa"))
verifyIterator(t, 3, it)
it = parent.newAccountIterator()
// expected: nothing
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
child := parent.Update(common.Hash{},
mkAccounts("0xbb", "0xdd", "0xf0"), storage)
child = child.Update(common.Hash{},
mkAccounts("0xcc", "0xf0", "0xff"), storage)
it = child.newFastAccountIterator()
// expected: cc, dd, ee, f0, ff
it.Seek(common.HexToHash("0xbb"))
verifyIterator(t, 5, it)
it = child.newFastAccountIterator()
it.Seek(common.HexToHash("0xef"))
// exp: f0, ff
verifyIterator(t, 2, it)
it = child.newFastAccountIterator()
it.Seek(common.HexToHash("0xf0"))
verifyIterator(t, 1, it)
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
}