forked from cerc-io/plugeth
core/state/snapshot: implement iterator priority for fast direct data lookup
This commit is contained in:
parent
e567675473
commit
e570835356
@ -24,20 +24,27 @@ import (
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
)
|
||||
|
||||
type weightedIterator struct {
|
||||
it AccountIterator
|
||||
priority int
|
||||
}
|
||||
|
||||
// fastAccountIterator is a more optimized multi-layer iterator which maintains a
|
||||
// direct mapping of all iterators leading down to the bottom layer
|
||||
type fastAccountIterator struct {
|
||||
iterators []AccountIterator
|
||||
iterators []*weightedIterator
|
||||
initiated bool
|
||||
fail error
|
||||
}
|
||||
|
||||
// The fast iterator does not query parents as much.
|
||||
// newFastAccountIterator creates a new fastAccountIterator
|
||||
func (dl *diffLayer) newFastAccountIterator() AccountIterator {
|
||||
f := &fastAccountIterator{
|
||||
iterators: dl.iterators(),
|
||||
initiated: false,
|
||||
}
|
||||
for i, it := range dl.iterators() {
|
||||
f.iterators = append(f.iterators, &weightedIterator{it, -i})
|
||||
}
|
||||
f.Seek(common.Hash{})
|
||||
return f
|
||||
}
|
||||
@ -49,9 +56,17 @@ func (fi *fastAccountIterator) Len() int {
|
||||
|
||||
// Less implements sort.Interface
|
||||
func (fi *fastAccountIterator) Less(i, j int) bool {
|
||||
a := fi.iterators[i].Key()
|
||||
b := fi.iterators[j].Key()
|
||||
return bytes.Compare(a[:], b[:]) < 0
|
||||
a := fi.iterators[i].it.Key()
|
||||
b := fi.iterators[j].it.Key()
|
||||
bDiff := bytes.Compare(a[:], b[:])
|
||||
if bDiff < 0 {
|
||||
return true
|
||||
}
|
||||
if bDiff > 0 {
|
||||
return false
|
||||
}
|
||||
// keys are equal, sort by iterator priority
|
||||
return fi.iterators[i].priority < fi.iterators[j].priority
|
||||
}
|
||||
|
||||
// Swap implements sort.Interface
|
||||
@ -61,23 +76,42 @@ func (fi *fastAccountIterator) Swap(i, j int) {
|
||||
|
||||
func (fi *fastAccountIterator) Seek(key common.Hash) {
|
||||
// We need to apply this across all iterators
|
||||
var seen = make(map[common.Hash]struct{})
|
||||
var seen = make(map[common.Hash]int)
|
||||
|
||||
length := len(fi.iterators)
|
||||
for i, it := range fi.iterators {
|
||||
it.Seek(key)
|
||||
for i := 0; i < len(fi.iterators); i++ {
|
||||
//for i, it := range fi.iterators {
|
||||
it := fi.iterators[i]
|
||||
it.it.Seek(key)
|
||||
for {
|
||||
if !it.Next() {
|
||||
if !it.it.Next() {
|
||||
// To be removed
|
||||
// swap it to the last position for now
|
||||
fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
|
||||
length--
|
||||
break
|
||||
}
|
||||
v := it.Key()
|
||||
if _, exist := seen[v]; !exist {
|
||||
seen[v] = struct{}{}
|
||||
v := it.it.Key()
|
||||
if other, exist := seen[v]; !exist {
|
||||
seen[v] = i
|
||||
break
|
||||
} else {
|
||||
// This whole else-block can be avoided, if we instead
|
||||
// do an inital priority-sort of the iterators. If we do that,
|
||||
// then we'll only wind up here if a lower-priority (preferred) iterator
|
||||
// has the same value, and then we will always just continue.
|
||||
// However, it costs an extra sort, so it's probably not better
|
||||
|
||||
// One needs to be progressed, use priority to determine which
|
||||
if fi.iterators[other].priority < it.priority {
|
||||
// the 'it' should be progressed
|
||||
continue
|
||||
} else {
|
||||
// the 'other' should be progressed - swap them
|
||||
it = fi.iterators[other]
|
||||
fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other]
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -110,7 +144,7 @@ func (fi *fastAccountIterator) Next() bool {
|
||||
// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
|
||||
// along the list and apply the same operation if needed
|
||||
func (fi *fastAccountIterator) innerNext(pos int) bool {
|
||||
if !fi.iterators[pos].Next() {
|
||||
if !fi.iterators[pos].it.Next() {
|
||||
//Exhausted, remove this iterator
|
||||
fi.remove(pos)
|
||||
if len(fi.iterators) == 0 {
|
||||
@ -123,23 +157,23 @@ func (fi *fastAccountIterator) innerNext(pos int) bool {
|
||||
return true
|
||||
}
|
||||
// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
|
||||
val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
|
||||
diff := bytes.Compare(val[:], neighbour[:])
|
||||
if diff < 0 {
|
||||
var (
|
||||
current, neighbour = fi.iterators[pos], fi.iterators[pos+1]
|
||||
val, neighbourVal = current.it.Key(), neighbour.it.Key()
|
||||
)
|
||||
if diff := bytes.Compare(val[:], neighbourVal[:]); diff < 0 {
|
||||
// It is still in correct place
|
||||
return true
|
||||
}
|
||||
if diff == 0 {
|
||||
// It has same value as the neighbour. So still in correct place, but
|
||||
// we need to iterate on the neighbour
|
||||
} else if diff == 0 && current.priority < neighbour.priority {
|
||||
// So still in correct place, but we need to iterate on the neighbour
|
||||
fi.innerNext(pos + 1)
|
||||
return true
|
||||
}
|
||||
// At this point, the elem is in the wrong location, but the
|
||||
// remaining list is sorted. Find out where to move the elem
|
||||
iterationNeeded := false
|
||||
iteratee := -1
|
||||
index := sort.Search(len(fi.iterators), func(n int) bool {
|
||||
if n <= pos {
|
||||
if n < pos {
|
||||
// No need to search 'behind' us
|
||||
return false
|
||||
}
|
||||
@ -147,18 +181,29 @@ func (fi *fastAccountIterator) innerNext(pos int) bool {
|
||||
// Can always place an elem last
|
||||
return true
|
||||
}
|
||||
neighbour := fi.iterators[n+1].Key()
|
||||
diff := bytes.Compare(val[:], neighbour[:])
|
||||
if diff == 0 {
|
||||
// The elem we're placing it next to has the same value,
|
||||
// so it's going to need further iteration
|
||||
iterationNeeded = true
|
||||
neighbour := fi.iterators[n+1].it.Key()
|
||||
if diff := bytes.Compare(val[:], neighbour[:]); diff < 0 {
|
||||
return true
|
||||
} else if diff > 0 {
|
||||
return false
|
||||
}
|
||||
return diff < 0
|
||||
// The elem we're placing it next to has the same value,
|
||||
// so whichever winds up on n+1 will need further iteraton
|
||||
iteratee = n + 1
|
||||
if current.priority < fi.iterators[n+1].priority {
|
||||
// We can drop the iterator here
|
||||
return true
|
||||
}
|
||||
// We need to move it one step further
|
||||
return false
|
||||
// TODO benchmark which is best, this works too:
|
||||
//iteratee = n
|
||||
//return true
|
||||
// Doing so should finish the current search earlier
|
||||
})
|
||||
fi.move(pos, index)
|
||||
if iterationNeeded {
|
||||
fi.innerNext(index)
|
||||
if iteratee != -1 {
|
||||
fi.innerNext(iteratee)
|
||||
}
|
||||
return true
|
||||
}
|
||||
@ -171,7 +216,7 @@ func (fi *fastAccountIterator) move(index, newpos int) {
|
||||
var (
|
||||
elem = fi.iterators[index]
|
||||
middle = fi.iterators[index+1 : newpos+1]
|
||||
suffix []AccountIterator
|
||||
suffix []*weightedIterator
|
||||
)
|
||||
if newpos < len(fi.iterators)-1 {
|
||||
suffix = fi.iterators[newpos+1:]
|
||||
@ -194,18 +239,18 @@ func (fi *fastAccountIterator) Error() error {
|
||||
|
||||
// Key returns the current key
|
||||
func (fi *fastAccountIterator) Key() common.Hash {
|
||||
return fi.iterators[0].Key()
|
||||
return fi.iterators[0].it.Key()
|
||||
}
|
||||
|
||||
// Value returns the current key
|
||||
func (fi *fastAccountIterator) Value() []byte {
|
||||
panic("todo")
|
||||
return fi.iterators[0].it.Value()
|
||||
}
|
||||
|
||||
// Debug is a convencience helper during testing
|
||||
func (fi *fastAccountIterator) Debug() {
|
||||
for _, it := range fi.iterators {
|
||||
fmt.Printf(" %v ", it.Key()[31])
|
||||
fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Key()[0])
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package snapshot
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
@ -95,9 +96,10 @@ func TestFastIteratorBasics(t *testing.T) {
|
||||
{9, 10}, {10, 13, 15, 16}},
|
||||
expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
|
||||
} {
|
||||
var iterators []AccountIterator
|
||||
for _, data := range tc.lists {
|
||||
iterators = append(iterators, newTestIterator(data...))
|
||||
var iterators []*weightedIterator
|
||||
for i, data := range tc.lists {
|
||||
it := newTestIterator(data...)
|
||||
iterators = append(iterators, &weightedIterator{it, i})
|
||||
|
||||
}
|
||||
fi := &fastAccountIterator{
|
||||
@ -162,6 +164,69 @@ func TestIteratorTraversal(t *testing.T) {
|
||||
verifyIterator(t, 7, child.newFastAccountIterator())
|
||||
}
|
||||
|
||||
// TestIteratorTraversalValues tests some multi-layer iteration, where we
|
||||
// also expect the correct values to show up
|
||||
func TestIteratorTraversalValues(t *testing.T) {
|
||||
var (
|
||||
storage = make(map[common.Hash]map[common.Hash][]byte)
|
||||
a = make(map[common.Hash][]byte)
|
||||
b = make(map[common.Hash][]byte)
|
||||
c = make(map[common.Hash][]byte)
|
||||
d = make(map[common.Hash][]byte)
|
||||
e = make(map[common.Hash][]byte)
|
||||
f = make(map[common.Hash][]byte)
|
||||
g = make(map[common.Hash][]byte)
|
||||
h = make(map[common.Hash][]byte)
|
||||
)
|
||||
// entries in multiple layers should only become output once
|
||||
for i := byte(2); i < 0xff; i++ {
|
||||
a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i))
|
||||
if i > 20 && i%2 == 0 {
|
||||
b[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 1, i))
|
||||
}
|
||||
if i%4 == 0 {
|
||||
c[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 2, i))
|
||||
}
|
||||
if i%7 == 0 {
|
||||
d[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 3, i))
|
||||
}
|
||||
if i%8 == 0 {
|
||||
e[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 4, i))
|
||||
}
|
||||
if i > 50 || i < 85 {
|
||||
f[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 5, i))
|
||||
}
|
||||
if i%64 == 0 {
|
||||
g[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 6, i))
|
||||
}
|
||||
if i%128 == 0 {
|
||||
h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i))
|
||||
}
|
||||
}
|
||||
child := newDiffLayer(emptyLayer(), common.Hash{}, a, storage).
|
||||
Update(common.Hash{}, b, storage).
|
||||
Update(common.Hash{}, c, storage).
|
||||
Update(common.Hash{}, d, storage).
|
||||
Update(common.Hash{}, e, storage).
|
||||
Update(common.Hash{}, f, storage).
|
||||
Update(common.Hash{}, g, storage).
|
||||
Update(common.Hash{}, h, storage)
|
||||
|
||||
it := child.newFastAccountIterator()
|
||||
for it.Next() {
|
||||
key := it.Key()
|
||||
exp, err := child.accountRLP(key, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got := it.Value()
|
||||
if !bytes.Equal(exp, got) {
|
||||
t.Fatalf("Error on key %x, got %v exp %v", key, string(got), string(exp))
|
||||
}
|
||||
//fmt.Printf("val: %v\n", string(it.Value()))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIteratorLargeTraversal(t *testing.T) {
|
||||
// This testcase is a bit notorious -- all layers contain the exact
|
||||
// same 200 accounts.
|
||||
@ -195,8 +260,14 @@ func TestIteratorLargeTraversal(t *testing.T) {
|
||||
// same 200 accounts. That means that we need to process 2000 items, but only
|
||||
// spit out 200 values eventually.
|
||||
//
|
||||
//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op
|
||||
//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op
|
||||
// The value-fetching benchmark is easy on the binary iterator, since it never has to reach
|
||||
// down at any depth for retrieving the values -- all are on the toppmost layer
|
||||
//
|
||||
// BenchmarkIteratorTraversal/binary_iterator_keys-6 2239 483674 ns/op
|
||||
// BenchmarkIteratorTraversal/binary_iterator_values-6 2403 501810 ns/op
|
||||
// BenchmarkIteratorTraversal/fast_iterator_keys-6 1923 677966 ns/op
|
||||
// BenchmarkIteratorTraversal/fast_iterator_values-6 1741 649967 ns/op
|
||||
//
|
||||
func BenchmarkIteratorTraversal(b *testing.B) {
|
||||
|
||||
var storage = make(map[common.Hash]map[common.Hash][]byte)
|
||||
@ -224,7 +295,7 @@ func BenchmarkIteratorTraversal(b *testing.B) {
|
||||
// We call this once before the benchmark, so the creation of
|
||||
// sorted accountlists are not included in the results.
|
||||
child.newBinaryAccountIterator()
|
||||
b.Run("binary iterator", func(b *testing.B) {
|
||||
b.Run("binary iterator keys", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newBinaryAccountIterator()
|
||||
@ -236,7 +307,20 @@ func BenchmarkIteratorTraversal(b *testing.B) {
|
||||
}
|
||||
}
|
||||
})
|
||||
b.Run("fast iterator", func(b *testing.B) {
|
||||
b.Run("binary iterator values", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newBinaryAccountIterator()
|
||||
for it.Next() {
|
||||
got++
|
||||
child.accountRLP(it.Key(), 0)
|
||||
}
|
||||
if exp := 200; got != exp {
|
||||
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
b.Run("fast iterator keys", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newFastAccountIterator()
|
||||
@ -248,6 +332,19 @@ func BenchmarkIteratorTraversal(b *testing.B) {
|
||||
}
|
||||
}
|
||||
})
|
||||
b.Run("fast iterator values", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newFastAccountIterator()
|
||||
for it.Next() {
|
||||
got++
|
||||
it.Value()
|
||||
}
|
||||
if exp := 200; got != exp {
|
||||
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
|
||||
@ -256,8 +353,10 @@ func BenchmarkIteratorTraversal(b *testing.B) {
|
||||
// This is heavy on the binary iterator, which in most cases will have to
|
||||
// call recursively 100 times for the majority of the values
|
||||
//
|
||||
// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op
|
||||
// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op
|
||||
// BenchmarkIteratorLargeBaselayer/binary_iterator_(keys)-6 514 1971999 ns/op
|
||||
// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6 10000 114385 ns/op
|
||||
// BenchmarkIteratorLargeBaselayer/binary_iterator_(values)-6 61 18997492 ns/op
|
||||
// BenchmarkIteratorLargeBaselayer/fast_iterator_(values)-6 4047 296823 ns/op
|
||||
func BenchmarkIteratorLargeBaselayer(b *testing.B) {
|
||||
var storage = make(map[common.Hash]map[common.Hash][]byte)
|
||||
|
||||
@ -285,7 +384,7 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
|
||||
// We call this once before the benchmark, so the creation of
|
||||
// sorted accountlists are not included in the results.
|
||||
child.newBinaryAccountIterator()
|
||||
b.Run("binary iterator", func(b *testing.B) {
|
||||
b.Run("binary iterator (keys)", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newBinaryAccountIterator()
|
||||
@ -297,7 +396,7 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
|
||||
}
|
||||
}
|
||||
})
|
||||
b.Run("fast iterator", func(b *testing.B) {
|
||||
b.Run("fast iterator (keys)", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newFastAccountIterator()
|
||||
@ -309,6 +408,34 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
|
||||
}
|
||||
}
|
||||
})
|
||||
b.Run("binary iterator (values)", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newBinaryAccountIterator()
|
||||
for it.Next() {
|
||||
got++
|
||||
v := it.Key()
|
||||
child.accountRLP(v, -0)
|
||||
}
|
||||
if exp := 2000; got != exp {
|
||||
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("fast iterator (values)", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
got := 0
|
||||
it := child.newFastAccountIterator()
|
||||
for it.Next() {
|
||||
it.Value()
|
||||
got++
|
||||
}
|
||||
if exp := 2000; got != exp {
|
||||
b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestIteratorFlatting tests what happens when we
|
||||
@ -394,3 +521,34 @@ func TestIteratorSeek(t *testing.T) {
|
||||
it.Seek(common.HexToHash("0xff"))
|
||||
verifyIterator(t, 0, it)
|
||||
}
|
||||
|
||||
//BenchmarkIteratorSeek/init+seek-6 4328 245477 ns/op
|
||||
func BenchmarkIteratorSeek(b *testing.B) {
|
||||
|
||||
var storage = make(map[common.Hash]map[common.Hash][]byte)
|
||||
mkAccounts := func(num int) map[common.Hash][]byte {
|
||||
accounts := make(map[common.Hash][]byte)
|
||||
for i := 0; i < num; i++ {
|
||||
h := common.Hash{}
|
||||
binary.BigEndian.PutUint64(h[:], uint64(i+1))
|
||||
accounts[h] = randomAccount()
|
||||
}
|
||||
return accounts
|
||||
}
|
||||
layer := newDiffLayer(emptyLayer(), common.Hash{}, mkAccounts(200), storage)
|
||||
for i := 1; i < 100; i++ {
|
||||
layer = layer.Update(common.Hash{},
|
||||
mkAccounts(200), storage)
|
||||
}
|
||||
b.Run("init+seek", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
seekpos := make([]byte, 20)
|
||||
for i := 0; i < b.N; i++ {
|
||||
b.StopTimer()
|
||||
rand.Read(seekpos)
|
||||
it := layer.newFastAccountIterator()
|
||||
b.StartTimer()
|
||||
it.Seek(common.BytesToHash(seekpos))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user