Add WriteStateSnapshot #15

Merged
roysc merged 23 commits from with-iterator-tracker into main 2023-09-28 03:35:47 +00:00
Showing only changes of commit 1781675b39 - Show all commits

View File

@ -8,23 +8,8 @@ import (
)
type symmDiffIterator struct {
a, b iterState // Nodes returned are those in b - a and a - b (keys only)
yieldFromA bool // Whether next node comes from a
count int // Number of nodes scanned on either trie
eqPathIndex int // Count index of last pair of equal paths, to detect an updated key
}
// NewSymmetricDifferenceIterator constructs a trie.NodeIterator that iterates over the symmetric difference
// of elements in a and b, i.e., the elements in a that are not in b, and vice versa.
// Returns the iterator, and a pointer to an integer recording the number of nodes seen.
func NewSymmetricDifferenceIterator(a, b trie.NodeIterator) (*symmDiffIterator, *int) {
it := &symmDiffIterator{
a: iterState{a, true},
b: iterState{b, true},
// common paths are detected by a distance <=1 from this index, so put it out of reach
eqPathIndex: -2,
}
return it, &it.count
a, b iterState // Nodes returned are those in b - a and a - b (keys only)
SymmDiffAux
}
// pairs an iterator with a cache of its valid status
@ -33,11 +18,49 @@ type iterState struct {
valid bool
}
// SymmDiffAux exposes state specific to symmetric difference iteration, which is not accessible
// from the NodeIterator interface. This includes the number of nodes seen, whether the current key
// is common to both A and B, and whether the current node is sourced from A or B.
type SymmDiffAux struct {
telackey marked this conversation as resolved Outdated

What is the advantage/disadvantage of doing this by a sort of "sidecar" type vs extending the interface or type.

Eg, something more traditional like:

type SymDiffNodeIterator interface {
    NodeIterator
    NextComesFromA() bool 
    Position() int
    LastEqPathIndex() int
}

Just as a rule, having one type representing the internal state of another type, and passing them both around, strikes me as odd, but there may be a compelling reason to do it that way.

What is the advantage/disadvantage of doing this by a sort of "sidecar" type vs extending the interface or type. Eg, something more traditional like: ``` type SymDiffNodeIterator interface { NodeIterator NextComesFromA() bool Position() int LastEqPathIndex() int } ``` Just as a rule, having one type representing the internal state of another type, and passing them both around, strikes me as odd, but there may be a compelling reason to do it that way.
Outdated
Review

Definitely, it's ugly. I didn't want to do this, but the problem comes when we wrap the iterators for tracking. We need to use the wrapped iterator in order for tracking to work, but we need the state exposed by the underlying one. We could extract the base with casts, but that's not much better and leaks the abstraction.

Other workarounds would be to

  1. return the base iterators from tracker.Restore along with the wrapped ones, or
  2. create a new tracker.Iterator interface which exposes the base somehow.

2 means new types and extra state that's mostly not used. 1 is okay, though from this end the result looks about the same (need to pass an extra object).

Definitely, it's ugly. I didn't want to do this, but the problem comes when we wrap the iterators for tracking. We need to use the wrapped iterator in order for tracking to work, but we need the state exposed by the underlying one. We could extract the base with casts, but that's not much better and leaks the abstraction. Other workarounds would be to 1. return the base iterators from `tracker.Restore` along with the wrapped ones, or 2. create a new `tracker.Iterator` interface which exposes the base somehow. 2 means new types and extra state that's mostly not used. 1 is okay, though from this end the result looks about the same (need to pass an extra object).
Outdated
Review

I've done option 1, but kept the separate state type, so it's clear that only a subset of the symm-diff iterator state is actually used in that context - rather than just passing the same object as a different type.

I've done option 1, but kept the separate state type, so it's clear that only a subset of the symm-diff iterator state is actually used in that context - rather than just passing the same object as a different type.
yieldFromA bool // Whether next node comes from a
count int // Number of nodes scanned on either trie
eqPathIndex int // Count index of last pair of equal paths, to detect an updated key
}
// NewSymmetricDifferenceIterator constructs a trie.NodeIterator that iterates over the symmetric difference
// of elements in a and b, i.e., the elements in a that are not in b, and vice versa.
// Returns the iterator, and a pointer to an auxiliary object for accessing the state not exposed by the NodeIterator interface recording the number of nodes seen.
func NewSymmetricDifferenceIterator(a, b trie.NodeIterator) (trie.NodeIterator, *SymmDiffAux) {
it := &symmDiffIterator{
a: iterState{a, true},
b: iterState{b, true},
// common paths are detected by a distance <=1 between count and this index, so we start at -2
SymmDiffAux: SymmDiffAux{eqPathIndex: -2},
}
return it, &it.SymmDiffAux
}
func (st *iterState) Next(descend bool) bool {
st.valid = st.NodeIterator.Next(descend)
return st.valid
}
// FromA returns true if the current node is sourced from A.
func (it *SymmDiffAux) FromA() bool {
return it.yieldFromA
}
// CommonPath returns true if a node with the current path exists in each sub-iterator - i.e. it
// represents an updated node.
func (it *SymmDiffAux) CommonPath() bool {
return it.count-it.eqPathIndex <= 1
}
// Count returns the number of nodes seen.
func (it *SymmDiffAux) Count() int {
return it.count
}
func (it *symmDiffIterator) curr() *iterState {
if it.yieldFromA {
return &it.a
@ -45,17 +68,6 @@ func (it *symmDiffIterator) curr() *iterState {
return &it.b
}
// FromA returns true if the current node is sourced from A.
func (it *symmDiffIterator) FromA() bool {
return it.yieldFromA
}
// CommonPath returns true if a node with the current path exists in each sub-iterator - i.e. it
// represents an updated node.
func (it *symmDiffIterator) CommonPath() bool {
return it.count-it.eqPathIndex <= 1
}
func (it *symmDiffIterator) Hash() common.Hash {
return it.curr().Hash()
}