core/state/snapshot: implement iterator priority for fast direct data lookup

This commit is contained in:
Martin Holst Swende 2019-12-06 23:27:18 +01:00 committed by Péter Szilágyi
parent e567675473
commit e570835356
No known key found for this signature in database
GPG Key ID: E9AE538CEDF8293D
2 changed files with 250 additions and 47 deletions

View File

@ -24,20 +24,27 @@ import (
"github.com/ethereum/go-ethereum/common"
)
type weightedIterator struct {
it AccountIterator
priority int
}
// fastAccountIterator is a more optimized multi-layer iterator which maintains a
// direct mapping of all iterators leading down to the bottom layer
type fastAccountIterator struct {
iterators []AccountIterator
iterators []*weightedIterator
initiated bool
fail error
}
// The fast iterator does not query parents as much.
// newFastAccountIterator creates a new fastAccountIterator
func (dl *diffLayer) newFastAccountIterator() AccountIterator {
f := &fastAccountIterator{
iterators: dl.iterators(),
initiated: false,
}
for i, it := range dl.iterators() {
f.iterators = append(f.iterators, &weightedIterator{it, -i})
}
f.Seek(common.Hash{})
return f
}
@ -49,9 +56,17 @@ func (fi *fastAccountIterator) Len() int {
// Less implements sort.Interface
func (fi *fastAccountIterator) Less(i, j int) bool {
a := fi.iterators[i].Key()
b := fi.iterators[j].Key()
return bytes.Compare(a[:], b[:]) < 0
a := fi.iterators[i].it.Key()
b := fi.iterators[j].it.Key()
bDiff := bytes.Compare(a[:], b[:])
if bDiff < 0 {
return true
}
if bDiff > 0 {
return false
}
// keys are equal, sort by iterator priority
return fi.iterators[i].priority < fi.iterators[j].priority
}
// Swap implements sort.Interface
@ -61,23 +76,42 @@ func (fi *fastAccountIterator) Swap(i, j int) {
func (fi *fastAccountIterator) Seek(key common.Hash) {
// We need to apply this across all iterators
var seen = make(map[common.Hash]struct{})
var seen = make(map[common.Hash]int)
length := len(fi.iterators)
for i, it := range fi.iterators {
it.Seek(key)
for i := 0; i < len(fi.iterators); i++ {
//for i, it := range fi.iterators {
it := fi.iterators[i]
it.it.Seek(key)
for {
if !it.Next() {
if !it.it.Next() {
// To be removed
// swap it to the last position for now
fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
length--
break
}
v := it.Key()
if _, exist := seen[v]; !exist {
seen[v] = struct{}{}
v := it.it.Key()
if other, exist := seen[v]; !exist {
seen[v] = i
break
} else {
// This whole else-block can be avoided, if we instead
// do an inital priority-sort of the iterators. If we do that,
// then we'll only wind up here if a lower-priority (preferred) iterator
// has the same value, and then we will always just continue.
// However, it costs an extra sort, so it's probably not better
// One needs to be progressed, use priority to determine which
if fi.iterators[other].priority < it.priority {
// the 'it' should be progressed
continue
} else {
// the 'other' should be progressed - swap them
it = fi.iterators[other]
fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other]
continue
}
}
}
}
@ -110,7 +144,7 @@ func (fi *fastAccountIterator) Next() bool {
// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
// along the list and apply the same operation if needed
func (fi *fastAccountIterator) innerNext(pos int) bool {
if !fi.iterators[pos].Next() {
if !fi.iterators[pos].it.Next() {
//Exhausted, remove this iterator
fi.remove(pos)
if len(fi.iterators) == 0 {
@ -123,23 +157,23 @@ func (fi *fastAccountIterator) innerNext(pos int) bool {
return true
}
// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff < 0 {
var (
current, neighbour = fi.iterators[pos], fi.iterators[pos+1]
val, neighbourVal = current.it.Key(), neighbour.it.Key()
)
if diff := bytes.Compare(val[:], neighbourVal[:]); diff < 0 {
// It is still in correct place
return true
}
if diff == 0 {
// It has same value as the neighbour. So still in correct place, but
// we need to iterate on the neighbour
} else if diff == 0 && current.priority < neighbour.priority {
// So still in correct place, but we need to iterate on the neighbour
fi.innerNext(pos + 1)
return true
}
// At this point, the elem is in the wrong location, but the
// remaining list is sorted. Find out where to move the elem
iterationNeeded := false
iteratee := -1
index := sort.Search(len(fi.iterators), func(n int) bool {
if n <= pos {
if n < pos {
// No need to search 'behind' us
return false
}
@ -147,18 +181,29 @@ func (fi *fastAccountIterator) innerNext(pos int) bool {
// Can always place an elem last
return true
}
neighbour := fi.iterators[n+1].Key()
diff := bytes.Compare(val[:], neighbour[:])
if diff == 0 {
// The elem we're placing it next to has the same value,
// so it's going to need further iteration
iterationNeeded = true
neighbour := fi.iterators[n+1].it.Key()
if diff := bytes.Compare(val[:], neighbour[:]); diff < 0 {
return true
} else if diff > 0 {
return false
}
return diff < 0
// The elem we're placing it next to has the same value,
// so whichever winds up on n+1 will need further iteraton
iteratee = n + 1
if current.priority < fi.iterators[n+1].priority {
// We can drop the iterator here
return true
}
// We need to move it one step further
return false
// TODO benchmark which is best, this works too:
//iteratee = n
//return true
// Doing so should finish the current search earlier
})
fi.move(pos, index)
if iterationNeeded {
fi.innerNext(index)
if iteratee != -1 {
fi.innerNext(iteratee)
}
return true
}
@ -171,7 +216,7 @@ func (fi *fastAccountIterator) move(index, newpos int) {
var (
elem = fi.iterators[index]
middle = fi.iterators[index+1 : newpos+1]
suffix []AccountIterator
suffix []*weightedIterator
)
if newpos < len(fi.iterators)-1 {
suffix = fi.iterators[newpos+1:]
@ -194,18 +239,18 @@ func (fi *fastAccountIterator) Error() error {
// Key returns the current key
func (fi *fastAccountIterator) Key() common.Hash {
return fi.iterators[0].Key()
return fi.iterators[0].it.Key()
}
// Value returns the current key
func (fi *fastAccountIterator) Value() []byte {
panic("todo")
return fi.iterators[0].it.Value()
}
// Debug is a convencience helper during testing
func (fi *fastAccountIterator) Debug() {
for _, it := range fi.iterators {
fmt.Printf(" %v ", it.Key()[31])
fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Key()[0])
}
fmt.Println()
}

View File

@ -19,6 +19,7 @@ package snapshot
import (
"bytes"
"encoding/binary"
"fmt"
"math/rand"
"testing"
@ -95,9 +96,10 @@ func TestFastIteratorBasics(t *testing.T) {
{9, 10}, {10, 13, 15, 16}},
expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
} {
var iterators []AccountIterator
for _, data := range tc.lists {
iterators = append(iterators, newTestIterator(data...))
var iterators []*weightedIterator
for i, data := range tc.lists {
it := newTestIterator(data...)
iterators = append(iterators, &weightedIterator{it, i})
}
fi := &fastAccountIterator{
@ -162,6 +164,69 @@ func TestIteratorTraversal(t *testing.T) {
verifyIterator(t, 7, child.newFastAccountIterator())
}
// TestIteratorTraversalValues tests some multi-layer iteration, where we
// also expect the correct values to show up
func TestIteratorTraversalValues(t *testing.T) {
var (
storage = make(map[common.Hash]map[common.Hash][]byte)
a = make(map[common.Hash][]byte)
b = make(map[common.Hash][]byte)
c = make(map[common.Hash][]byte)
d = make(map[common.Hash][]byte)
e = make(map[common.Hash][]byte)
f = make(map[common.Hash][]byte)
g = make(map[common.Hash][]byte)
h = make(map[common.Hash][]byte)
)
// entries in multiple layers should only become output once
for i := byte(2); i < 0xff; i++ {
a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i))
if i > 20 && i%2 == 0 {
b[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 1, i))
}
if i%4 == 0 {
c[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 2, i))
}
if i%7 == 0 {
d[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 3, i))
}
if i%8 == 0 {
e[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 4, i))
}
if i > 50 || i < 85 {
f[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 5, i))
}
if i%64 == 0 {
g[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 6, i))
}
if i%128 == 0 {
h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i))
}
}
child := newDiffLayer(emptyLayer(), common.Hash{}, a, storage).
Update(common.Hash{}, b, storage).
Update(common.Hash{}, c, storage).
Update(common.Hash{}, d, storage).
Update(common.Hash{}, e, storage).
Update(common.Hash{}, f, storage).
Update(common.Hash{}, g, storage).
Update(common.Hash{}, h, storage)
it := child.newFastAccountIterator()
for it.Next() {
key := it.Key()
exp, err := child.accountRLP(key, 0)
if err != nil {
t.Fatal(err)
}
got := it.Value()
if !bytes.Equal(exp, got) {
t.Fatalf("Error on key %x, got %v exp %v", key, string(got), string(exp))
}
//fmt.Printf("val: %v\n", string(it.Value()))
}
}
func TestIteratorLargeTraversal(t *testing.T) {
// This testcase is a bit notorious -- all layers contain the exact
// same 200 accounts.
@ -195,8 +260,14 @@ func TestIteratorLargeTraversal(t *testing.T) {
// same 200 accounts. That means that we need to process 2000 items, but only
// spit out 200 values eventually.
//
//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op
//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op
// The value-fetching benchmark is easy on the binary iterator, since it never has to reach
// down at any depth for retrieving the values -- all are on the toppmost layer
//
// BenchmarkIteratorTraversal/binary_iterator_keys-6 2239 483674 ns/op
// BenchmarkIteratorTraversal/binary_iterator_values-6 2403 501810 ns/op
// BenchmarkIteratorTraversal/fast_iterator_keys-6 1923 677966 ns/op
// BenchmarkIteratorTraversal/fast_iterator_values-6 1741 649967 ns/op
//
func BenchmarkIteratorTraversal(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
@ -224,7 +295,7 @@ func BenchmarkIteratorTraversal(b *testing.B) {
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryAccountIterator()
b.Run("binary iterator", func(b *testing.B) {
b.Run("binary iterator keys", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryAccountIterator()
@ -236,7 +307,20 @@ func BenchmarkIteratorTraversal(b *testing.B) {
}
}
})
b.Run("fast iterator", func(b *testing.B) {
b.Run("binary iterator values", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryAccountIterator()
for it.Next() {
got++
child.accountRLP(it.Key(), 0)
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator keys", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastAccountIterator()
@ -248,6 +332,19 @@ func BenchmarkIteratorTraversal(b *testing.B) {
}
}
})
b.Run("fast iterator values", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastAccountIterator()
for it.Next() {
got++
it.Value()
}
if exp := 200; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
@ -256,8 +353,10 @@ func BenchmarkIteratorTraversal(b *testing.B) {
// This is heavy on the binary iterator, which in most cases will have to
// call recursively 100 times for the majority of the values
//
// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op
// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op
// BenchmarkIteratorLargeBaselayer/binary_iterator_(keys)-6 514 1971999 ns/op
// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6 10000 114385 ns/op
// BenchmarkIteratorLargeBaselayer/binary_iterator_(values)-6 61 18997492 ns/op
// BenchmarkIteratorLargeBaselayer/fast_iterator_(values)-6 4047 296823 ns/op
func BenchmarkIteratorLargeBaselayer(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
@ -285,7 +384,7 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
// We call this once before the benchmark, so the creation of
// sorted accountlists are not included in the results.
child.newBinaryAccountIterator()
b.Run("binary iterator", func(b *testing.B) {
b.Run("binary iterator (keys)", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryAccountIterator()
@ -297,7 +396,7 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
}
}
})
b.Run("fast iterator", func(b *testing.B) {
b.Run("fast iterator (keys)", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastAccountIterator()
@ -309,6 +408,34 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
}
}
})
b.Run("binary iterator (values)", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newBinaryAccountIterator()
for it.Next() {
got++
v := it.Key()
child.accountRLP(v, -0)
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
b.Run("fast iterator (values)", func(b *testing.B) {
for i := 0; i < b.N; i++ {
got := 0
it := child.newFastAccountIterator()
for it.Next() {
it.Value()
got++
}
if exp := 2000; got != exp {
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
}
}
})
}
// TestIteratorFlatting tests what happens when we
@ -394,3 +521,34 @@ func TestIteratorSeek(t *testing.T) {
it.Seek(common.HexToHash("0xff"))
verifyIterator(t, 0, it)
}
//BenchmarkIteratorSeek/init+seek-6 4328 245477 ns/op
func BenchmarkIteratorSeek(b *testing.B) {
var storage = make(map[common.Hash]map[common.Hash][]byte)
mkAccounts := func(num int) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for i := 0; i < num; i++ {
h := common.Hash{}
binary.BigEndian.PutUint64(h[:], uint64(i+1))
accounts[h] = randomAccount()
}
return accounts
}
layer := newDiffLayer(emptyLayer(), common.Hash{}, mkAccounts(200), storage)
for i := 1; i < 100; i++ {
layer = layer.Update(common.Hash{},
mkAccounts(200), storage)
}
b.Run("init+seek", func(b *testing.B) {
b.ResetTimer()
seekpos := make([]byte, 20)
for i := 0; i < b.N; i++ {
b.StopTimer()
rand.Read(seekpos)
it := layer.newFastAccountIterator()
b.StartTimer()
it.Seek(common.BytesToHash(seekpos))
}
})
}