Add WriteStateSnapshot #15

Merged
roysc merged 23 commits from with-iterator-tracker into main 2023-09-28 03:35:47 +00:00
5 changed files with 62 additions and 65 deletions
Showing only changes of commit 64d45bdfd8 - Show all commits

View File

@ -148,9 +148,9 @@ func (sdb *builder) WriteStateDiff(
func(subdiv uint) {
g.Go(func() error {
a, b := subitersA[subdiv], subitersB[subdiv]
it, aux := utils.NewSymmetricDifferenceIterator(a, b)
it := utils.NewSymmetricDifferenceIterator(a, b)
return sdb.processAccounts(ctx,
it, aux,
it, &it.SymmDiffState,
params.watchedAddressesLeafPaths,
nodeSink, ipldSink, logger,
)
@ -165,7 +165,7 @@ func (sdb *builder) WriteStateDiffTracked(
args Args, params Params,
nodeSink sdtypes.StateNodeSink,
ipldSink sdtypes.IPLDSink,
tracker tracker.Tracker,
tracker tracker.IteratorTracker,
) error {
defer metrics.UpdateDuration(time.Now(), metrics.IndexerMetrics.WriteStateDiffTimer)
// Load tries for old and new states
@ -178,18 +178,14 @@ func (sdb *builder) WriteStateDiffTracked(
return fmt.Errorf("error opening new state trie: %w", err)
}
var subiters []trie.NodeIterator
var auxes []*utils.SymmDiffAux
var subiters, bases []trie.NodeIterator
// Constructor for difference iterator at a specific (recovered) path
makeIterator := func(key []byte) trie.NodeIterator {
a := triea.NodeIterator(key)
b := trieb.NodeIterator(key)
diffit, aux := utils.NewSymmetricDifferenceIterator(a, b)
// iterators are constructed in-order, so these will align
auxes = append(auxes, aux)
return diffit
return utils.NewSymmetricDifferenceIterator(a, b)
}
subiters, err = tracker.Restore(makeIterator)
subiters, bases, err = tracker.Restore(makeIterator)
if err != nil {
return fmt.Errorf("error restoring iterators: %w", err)
}
@ -214,7 +210,7 @@ func (sdb *builder) WriteStateDiffTracked(
func(subdiv uint) {
g.Go(func() error {
return sdb.processAccounts(ctx,
subiters[subdiv], auxes[subdiv],
subiters[subdiv], &bases[subdiv].(*utils.SymmDiffIterator).SymmDiffState,
params.watchedAddressesLeafPaths,
nodeSink, ipldSink, logger,
)
@ -225,9 +221,10 @@ func (sdb *builder) WriteStateDiffTracked(
}
// processAccounts processes account creations, deletions, and updates
// the NodeIterator and SymmDiffIterator instances should refer to the same object, will only be used
func (sdb *builder) processAccounts(
ctx context.Context,
it trie.NodeIterator, aux *utils.SymmDiffAux,
it trie.NodeIterator, symdiff *utils.SymmDiffState,
watchedAddressesLeafPaths [][]byte,
nodeSink sdtypes.StateNodeSink, ipldSink sdtypes.IPLDSink,
logger log.Logger,
@ -250,7 +247,7 @@ func (sdb *builder) processAccounts(
if !isWatchedPathPrefix(watchedAddressesLeafPaths, it.Path()) {
continue
}
if aux.FromA() { // Node exists in the old trie
if symdiff.FromA() { // Node exists in the old trie
if it.Leaf() {
var account types.StateAccount
if err := rlp.DecodeBytes(it.LeafBlob(), &account); err != nil {
@ -259,7 +256,7 @@ func (sdb *builder) processAccounts(
leafKey := make([]byte, len(it.LeafKey()))
copy(leafKey, it.LeafKey())
if aux.CommonPath() {
if symdiff.CommonPath() {
// If B also contains this leaf node, this is the old state of an updated account.
if update, ok := updates[string(leafKey)]; ok {
update.oldRoot = account.Root
@ -284,7 +281,7 @@ func (sdb *builder) processAccounts(
return err
}
if aux.CommonPath() {
if symdiff.CommonPath() {
// If A also contains this leaf node, this is the new state of an updated account.
if update, ok := updates[string(accountW.LeafKey)]; ok {
update.new = *accountW
@ -354,7 +351,7 @@ func (sdb *builder) processAccounts(
}
}
metrics.IndexerMetrics.DifferenceIteratorCounter.Inc(int64(aux.Count()))
metrics.IndexerMetrics.DifferenceIteratorCounter.Inc(int64(symdiff.Count()))
return it.Error()
}
@ -482,10 +479,10 @@ func (sdb *builder) processStorageUpdates(
var prevBlob []byte
a, b := oldTrie.NodeIterator(nil), newTrie.NodeIterator(nil)
it, aux := utils.NewSymmetricDifferenceIterator(a, b)
it := utils.NewSymmetricDifferenceIterator(a, b)
for it.Next(true) {
if aux.FromA() {
if it.Leaf() && !aux.CommonPath() {
if it.FromA() {
if it.Leaf() && !it.CommonPath() {
// If this node's leaf key is absent from B, the storage slot was vacated.
// In that case, emit an empty "removed" storage node record.
if err := storageSink(sdtypes.StorageLeafNode{

2
go.mod
View File

@ -124,7 +124,7 @@ require (
)
replace (
github.com/cerc-io/eth-iterator-utils => git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-0.20230925184550-062eb329435f
github.com/cerc-io/eth-iterator-utils => git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-0.20230926100620-802551012643
github.com/cerc-io/eth-testing => git.vdb.to/cerc-io/eth-testing v0.3.1-0.20230925181540-2ea71042e7e0
github.com/ethereum/go-ethereum => git.vdb.to/cerc-io/plugeth v0.0.0-20230808125822-691dc334fab1
github.com/openrelayxyz/plugeth-utils => git.vdb.to/cerc-io/plugeth-utils v0.0.0-20230706160122-cd41de354c46

4
go.sum
View File

@ -1,6 +1,6 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-0.20230925184550-062eb329435f h1:sIuSkD6U7uYD/FGfvWOBViIuaHd+YhLM0Hln+4BQM10=
git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-0.20230925184550-062eb329435f/go.mod h1:Xv+d7Q11qGJcggcfxoj2JEvJJBKj0C66I6PyG5/lz9o=
git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-0.20230926100620-802551012643 h1:yJFyJgGVy1RMEJqPrTYyaB7fF1wpfx0Df5Bsunb+Lyg=
git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-0.20230926100620-802551012643/go.mod h1:Xv+d7Q11qGJcggcfxoj2JEvJJBKj0C66I6PyG5/lz9o=
git.vdb.to/cerc-io/eth-testing v0.3.1-0.20230925181540-2ea71042e7e0 h1:fWAvsSiuDqveuxwnfc8psInfLZhMqHlQnmOpOHsd8Tk=
git.vdb.to/cerc-io/eth-testing v0.3.1-0.20230925181540-2ea71042e7e0/go.mod h1:qdvpc/W1xvf2MKx3rMOqvFvYaYIHG77Z1g0lwsmw0Uk=
git.vdb.to/cerc-io/plugeth v0.0.0-20230808125822-691dc334fab1 h1:KLjxHwp9Zp7xhECccmJS00RiL+VwTuUGLU7qeIctg8g=

View File

@ -7,9 +7,9 @@ import (
"github.com/ethereum/go-ethereum/trie"
)
type symmDiffIterator struct {
type SymmDiffIterator struct {
a, b iterState // Nodes returned are those in b - a and a - b (keys only)
SymmDiffAux
SymmDiffState
}
// pairs an iterator with a cache of its valid status
@ -18,10 +18,10 @@ type iterState struct {
valid bool
}
// SymmDiffAux exposes state specific to symmetric difference iteration, which is not accessible
// SymmDiffState exposes state specific to symmetric difference iteration, which is not accessible
// from the NodeIterator interface. This includes the number of nodes seen, whether the current key
// is common to both A and B, and whether the current node is sourced from A or B.
type SymmDiffAux struct {
type SymmDiffState struct {
telackey marked this conversation as resolved Outdated

What is the advantage/disadvantage of doing this by a sort of "sidecar" type vs extending the interface or type.

Eg, something more traditional like:

type SymDiffNodeIterator interface {
    NodeIterator
    NextComesFromA() bool 
    Position() int
    LastEqPathIndex() int
}

Just as a rule, having one type representing the internal state of another type, and passing them both around, strikes me as odd, but there may be a compelling reason to do it that way.

What is the advantage/disadvantage of doing this by a sort of "sidecar" type vs extending the interface or type. Eg, something more traditional like: ``` type SymDiffNodeIterator interface { NodeIterator NextComesFromA() bool Position() int LastEqPathIndex() int } ``` Just as a rule, having one type representing the internal state of another type, and passing them both around, strikes me as odd, but there may be a compelling reason to do it that way.
Outdated
Review

Definitely, it's ugly. I didn't want to do this, but the problem comes when we wrap the iterators for tracking. We need to use the wrapped iterator in order for tracking to work, but we need the state exposed by the underlying one. We could extract the base with casts, but that's not much better and leaks the abstraction.

Other workarounds would be to

  1. return the base iterators from tracker.Restore along with the wrapped ones, or
  2. create a new tracker.Iterator interface which exposes the base somehow.

2 means new types and extra state that's mostly not used. 1 is okay, though from this end the result looks about the same (need to pass an extra object).

Definitely, it's ugly. I didn't want to do this, but the problem comes when we wrap the iterators for tracking. We need to use the wrapped iterator in order for tracking to work, but we need the state exposed by the underlying one. We could extract the base with casts, but that's not much better and leaks the abstraction. Other workarounds would be to 1. return the base iterators from `tracker.Restore` along with the wrapped ones, or 2. create a new `tracker.Iterator` interface which exposes the base somehow. 2 means new types and extra state that's mostly not used. 1 is okay, though from this end the result looks about the same (need to pass an extra object).
Outdated
Review

I've done option 1, but kept the separate state type, so it's clear that only a subset of the symm-diff iterator state is actually used in that context - rather than just passing the same object as a different type.

I've done option 1, but kept the separate state type, so it's clear that only a subset of the symm-diff iterator state is actually used in that context - rather than just passing the same object as a different type.
yieldFromA bool // Whether next node comes from a
count int // Number of nodes scanned on either trie
eqPathIndex int // Count index of last pair of equal paths, to detect an updated key
@ -30,14 +30,14 @@ type SymmDiffAux struct {
// NewSymmetricDifferenceIterator constructs a trie.NodeIterator that iterates over the symmetric difference
// of elements in a and b, i.e., the elements in a that are not in b, and vice versa.
// Returns the iterator, and a pointer to an auxiliary object for accessing the state not exposed by the NodeIterator interface recording the number of nodes seen.
func NewSymmetricDifferenceIterator(a, b trie.NodeIterator) (trie.NodeIterator, *SymmDiffAux) {
it := &symmDiffIterator{
func NewSymmetricDifferenceIterator(a, b trie.NodeIterator) *SymmDiffIterator {
it := &SymmDiffIterator{
a: iterState{a, true},
b: iterState{b, true},
// common paths are detected by a distance <=1 between count and this index, so we start at -2
SymmDiffAux: SymmDiffAux{eqPathIndex: -2},
SymmDiffState: SymmDiffState{eqPathIndex: -2},
}
return it, &it.SymmDiffAux
return it
}
func (st *iterState) Next(descend bool) bool {
@ -46,65 +46,65 @@ func (st *iterState) Next(descend bool) bool {
}
// FromA returns true if the current node is sourced from A.
func (it *SymmDiffAux) FromA() bool {
func (it *SymmDiffState) FromA() bool {
return it.yieldFromA
}
// CommonPath returns true if a node with the current path exists in each sub-iterator - i.e. it
// represents an updated node.
func (it *SymmDiffAux) CommonPath() bool {
func (it *SymmDiffState) CommonPath() bool {
return it.count-it.eqPathIndex <= 1
}
// Count returns the number of nodes seen.
func (it *SymmDiffAux) Count() int {
func (it *SymmDiffState) Count() int {
return it.count
}
func (it *symmDiffIterator) curr() *iterState {
func (it *SymmDiffIterator) curr() *iterState {
if it.yieldFromA {
return &it.a
}
return &it.b
}
func (it *symmDiffIterator) Hash() common.Hash {
func (it *SymmDiffIterator) Hash() common.Hash {
return it.curr().Hash()
}
func (it *symmDiffIterator) Parent() common.Hash {
func (it *SymmDiffIterator) Parent() common.Hash {
return it.curr().Parent()
}
func (it *symmDiffIterator) Leaf() bool {
func (it *SymmDiffIterator) Leaf() bool {
return it.curr().Leaf()
}
func (it *symmDiffIterator) LeafKey() []byte {
func (it *SymmDiffIterator) LeafKey() []byte {
return it.curr().LeafKey()
}
func (it *symmDiffIterator) LeafBlob() []byte {
func (it *SymmDiffIterator) LeafBlob() []byte {
return it.curr().LeafBlob()
}
func (it *symmDiffIterator) LeafProof() [][]byte {
func (it *SymmDiffIterator) LeafProof() [][]byte {
return it.curr().LeafProof()
}
func (it *symmDiffIterator) Path() []byte {
func (it *SymmDiffIterator) Path() []byte {
return it.curr().Path()
}
func (it *symmDiffIterator) NodeBlob() []byte {
func (it *SymmDiffIterator) NodeBlob() []byte {
return it.curr().NodeBlob()
}
func (it *symmDiffIterator) AddResolver(resolver trie.NodeResolver) {
func (it *SymmDiffIterator) AddResolver(resolver trie.NodeResolver) {
panic("not implemented")
}
func (it *symmDiffIterator) Next(bool) bool {
func (it *SymmDiffIterator) Next(bool) bool {
// NodeIterators start in a "pre-valid" state, so the first Next advances to a valid node.
if it.count == 0 {
if it.a.Next(true) {
@ -122,7 +122,7 @@ func (it *symmDiffIterator) Next(bool) bool {
return it.a.valid || it.b.valid
}
func (it *symmDiffIterator) seek() {
func (it *SymmDiffIterator) seek() {
// Invariants:
// - At the end of the function, the sub-iterator with the lexically lesser path
// points to the next element
@ -163,7 +163,7 @@ func (it *symmDiffIterator) seek() {
}
}
func (it *symmDiffIterator) Error() error {
func (it *SymmDiffIterator) Error() error {
if err := it.a.Error(); err != nil {
return err
}

View File

@ -45,33 +45,33 @@ func TestSymmetricDifferenceIterator(t *testing.T) {
t.Run("with no difference", func(t *testing.T) {
db := trie.NewDatabase(rawdb.NewMemoryDatabase())
triea := trie.NewEmpty(db)
di, aux := utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), triea.NodeIterator(nil))
di := utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), triea.NodeIterator(nil))
for di.Next(true) {
t.Errorf("iterator should not yield any elements")
}
assert.Equal(t, 0, aux.Count())
assert.Equal(t, 0, di.Count())
triea.MustUpdate([]byte("foo"), []byte("bar"))
di, aux = utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), triea.NodeIterator(nil))
di = utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), triea.NodeIterator(nil))
for di.Next(true) {
t.Errorf("iterator should not yield any elements")
}
// two nodes visited: the leaf (value) and its parent
assert.Equal(t, 2, aux.Count())
assert.Equal(t, 2, di.Count())
trieb := trie.NewEmpty(db)
di, aux = utils.NewSymmetricDifferenceIterator(triea.NodeIterator([]byte("jars")), trieb.NodeIterator(nil))
di = utils.NewSymmetricDifferenceIterator(triea.NodeIterator([]byte("jars")), trieb.NodeIterator(nil))
for di.Next(true) {
t.Errorf("iterator should not yield any elements")
}
assert.Equal(t, 0, aux.Count())
assert.Equal(t, 0, di.Count())
// TODO will fail until merged: https://github.com/ethereum/go-ethereum/pull/27838
// di, aux = utils.NewSymmetricDifferenceIterator(triea.NodeIterator([]byte("food")), trieb.NodeIterator(nil))
// for di.Next(true) {
// t.Errorf("iterator should not yield any elements")
// }
// assert.Equal(t, 0, aux.Count())
// assert.Equal(t, 0, di.Count())
})
t.Run("small difference", func(t *testing.T) {
@ -82,32 +82,32 @@ func TestSymmetricDifferenceIterator(t *testing.T) {
trieb := trie.NewEmpty(dbb)
trieb.MustUpdate([]byte("foo"), []byte("bar"))
di, aux := utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil))
di := utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil))
leaves := 0
for di.Next(true) {
if di.Leaf() {
assert.False(t, aux.CommonPath())
assert.False(t, di.CommonPath())
assert.Equal(t, "foo", string(di.LeafKey()))
assert.Equal(t, "bar", string(di.LeafBlob()))
leaves++
}
}
assert.Equal(t, 1, leaves)
assert.Equal(t, 2, aux.Count())
assert.Equal(t, 2, di.Count())
trieb.MustUpdate([]byte("quux"), []byte("bars"))
di, aux = utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator([]byte("quux")))
di = utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator([]byte("quux")))
leaves = 0
for di.Next(true) {
if di.Leaf() {
assert.False(t, aux.CommonPath())
assert.False(t, di.CommonPath())
assert.Equal(t, "quux", string(di.LeafKey()))
assert.Equal(t, "bars", string(di.LeafBlob()))
leaves++
}
}
assert.Equal(t, 1, leaves)
assert.Equal(t, 1, aux.Count())
assert.Equal(t, 1, di.Count())
})
dba := trie.NewDatabase(rawdb.NewMemoryDatabase())
@ -124,20 +124,20 @@ func TestSymmetricDifferenceIterator(t *testing.T) {
onlyA := make(map[string]string)
onlyB := make(map[string]string)
var deletions, creations []string
it, aux := utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil))
it := utils.NewSymmetricDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil))
for it.Next(true) {
if !it.Leaf() {
continue
}
key, value := string(it.LeafKey()), string(it.LeafBlob())
if aux.FromA() {
if it.FromA() {
onlyA[key] = value
if !aux.CommonPath() {
if !it.CommonPath() {
deletions = append(deletions, key)
}
} else {
onlyB[key] = value
if !aux.CommonPath() {
if !it.CommonPath() {
creations = append(creations, key)
}
}
@ -205,10 +205,10 @@ func TestCompareDifferenceIterators(t *testing.T) {
pathsA = append(pathsA, itAonly.Path())
}
itSym, aux := utils.NewSymmetricDifferenceIterator(treeA.NodeIterator(nil), treeB.NodeIterator(nil))
itSym := utils.NewSymmetricDifferenceIterator(treeA.NodeIterator(nil), treeB.NodeIterator(nil))
var idxA, idxB int
for itSym.Next(true) {
if aux.FromA() {
if itSym.FromA() {
require.Equal(t, pathsA[idxA], itSym.Path())
idxA++
} else {