Add tracker interface and tests #3
39
internal/test_helper.go
Normal file
39
internal/test_helper.go
Normal file
@ -0,0 +1,39 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/cerc-io/eth-testing/chaindata/small2"
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/state"
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
)
|
||||
|
||||
var (
|
||||
FixtureNodePaths = small2.Block1_StateNodePaths
|
||||
FixtureLeafKeys = small2.Block1_StateNodeLeafKeys
|
||||
)
|
||||
|
||||
func OpenFixtureTrie(t *testing.T, height uint64) (state.Trie, ethdb.Database) {
|
||||
data := small2.ChainData
|
||||
kvdb, ldberr := rawdb.NewLevelDBDatabase(data.ChainData, 1024, 256, t.Name(), true)
|
||||
if ldberr != nil {
|
||||
t.Fatal(ldberr)
|
||||
}
|
||||
edb, err := rawdb.NewDatabaseWithFreezer(kvdb, data.Ancient, t.Name(), true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
hash := rawdb.ReadCanonicalHash(edb, height)
|
||||
header := rawdb.ReadHeader(edb, hash, height)
|
||||
if header == nil {
|
||||
t.Fatalf("unable to read canonical header at height %d", height)
|
||||
}
|
||||
sdb := state.NewDatabase(edb)
|
||||
tree, err := sdb.OpenTrie(header.Root)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return tree, edb
|
||||
}
|
@ -5,11 +5,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||
"github.com/ethereum/go-ethereum/core/state"
|
||||
|
||||
iter "github.com/cerc-io/eth-iterator-utils"
|
||||
fixture "github.com/cerc-io/eth-testing/chaindata/medium"
|
||||
"github.com/cerc-io/eth-iterator-utils/internal"
|
||||
)
|
||||
|
||||
func TestMakePaths(t *testing.T) {
|
||||
@ -24,30 +21,8 @@ func TestMakePaths(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestIterator(t *testing.T) {
|
||||
kvdb, ldberr := rawdb.NewLevelDBDatabase(fixture.ChainDataPath, 1024, 256, "vdb-geth", true)
|
||||
if ldberr != nil {
|
||||
t.Fatal(ldberr)
|
||||
}
|
||||
edb, err := rawdb.NewDatabaseWithFreezer(kvdb, fixture.AncientDataPath, "vdb-geth", true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer edb.Close()
|
||||
|
||||
height := uint64(1)
|
||||
hash := rawdb.ReadCanonicalHash(edb, height)
|
||||
header := rawdb.ReadHeader(edb, hash, height)
|
||||
if header == nil {
|
||||
t.Fatalf("unable to read canonical header at height %d", height)
|
||||
}
|
||||
sdb := state.NewDatabase(edb)
|
||||
tree, err := sdb.OpenTrie(header.Root)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tree, edb := internal.OpenFixtureTrie(t, 1)
|
||||
t.Cleanup(func() { edb.Close() })
|
||||
|
||||
t.Run("in bounds", func(t *testing.T) {
|
||||
type testCase struct {
|
||||
@ -79,7 +54,7 @@ func TestIterator(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("trie is covered", func(t *testing.T) {
|
||||
allPaths := fixture.Block1_Paths
|
||||
allPaths := internal.FixtureNodePaths
|
||||
cases := []uint{1, 2, 4, 8, 16, 32}
|
||||
runCase := func(t *testing.T, nbins uint) {
|
||||
iters := iter.SubtrieIterators(tree.NodeIterator, nbins)
|
||||
|
@ -1,12 +1,10 @@
|
||||
package tracker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"sync"
|
||||
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
@ -14,23 +12,46 @@ import (
|
||||
iter "github.com/cerc-io/eth-iterator-utils"
|
||||
)
|
||||
|
||||
type Tracker struct {
|
||||
recoveryFile string
|
||||
|
||||
startChan chan *Iterator
|
||||
stopChan chan *Iterator
|
||||
started map[*Iterator]struct{}
|
||||
stopped []*Iterator
|
||||
running bool
|
||||
type Tracker interface {
|
||||
Restore(iter.IteratorConstructor) ([]trie.NodeIterator, error)
|
||||
Tracked(trie.NodeIterator) trie.NodeIterator
|
||||
CloseAndSave() error
|
||||
}
|
||||
|
||||
type Iterator struct {
|
||||
trie.NodeIterator
|
||||
tracker *Tracker
|
||||
var _ Tracker = &trackerAdaptor{}
|
||||
|
||||
// Wrap the tracker state to only expose NodeIterators
|
||||
type trackerAdaptor struct {
|
||||
*TrackerImpl
|
||||
}
|
||||
|
||||
func New(file string, bufsize uint) Tracker {
|
||||
return Tracker{
|
||||
// New creates a new tracker which saves state to a given file. bufsize sets the size of the
|
||||
// channel buffers used internally to manage tracking. Note that passing a bufsize smaller than the expected
|
||||
// number of concurrent iterators could lead to deadlock.
|
||||
func New(file string, bufsize uint) *trackerAdaptor {
|
||||
return &trackerAdaptor{NewImpl(file, bufsize)}
|
||||
}
|
||||
|
||||
func (tr *trackerAdaptor) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIterator, error) {
|
||||
its, err := tr.TrackerImpl.Restore(makeIterator)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []trie.NodeIterator
|
||||
for _, it := range its {
|
||||
ret = append(ret, it)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (tr *trackerAdaptor) Tracked(it trie.NodeIterator) trie.NodeIterator {
|
||||
var trick any = tr.TrackerImpl.Tracked(it)
|
||||
return trick.(trie.NodeIterator)
|
||||
}
|
||||
|
||||
func NewImpl(file string, bufsize uint) *TrackerImpl {
|
||||
return &TrackerImpl{
|
||||
recoveryFile: file,
|
||||
startChan: make(chan *Iterator, bufsize),
|
||||
stopChan: make(chan *Iterator, bufsize),
|
||||
@ -39,34 +60,47 @@ func New(file string, bufsize uint) Tracker {
|
||||
}
|
||||
}
|
||||
|
||||
func (tr *Tracker) CaptureSignal(cancelCtx context.CancelFunc) {
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
type TrackerImpl struct {
|
||||
recoveryFile string
|
||||
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
sig := <-sigChan
|
||||
log.Error("Signal received (%v), stopping", "signal", sig)
|
||||
// Cancel context on receiving a signal. On cancellation, all tracked iterators complete
|
||||
// processing of their current node before stopping.
|
||||
cancelCtx()
|
||||
}()
|
||||
startChan chan *Iterator
|
||||
stopChan chan *Iterator
|
||||
started map[*Iterator]struct{}
|
||||
stopped []*Iterator
|
||||
running bool
|
||||
sync.RWMutex // guards closing of the tracker
|
||||
}
|
||||
|
||||
type Iterator struct {
|
||||
trie.NodeIterator
|
||||
tracker *TrackerImpl
|
||||
}
|
||||
|
||||
// Tracked wraps an iterator in a Iterator. This should not be called once halts are possible.
|
||||
func (tr *Tracker) Tracked(it trie.NodeIterator) (ret *Iterator) {
|
||||
ret = &Iterator{it, tr}
|
||||
func (tr *TrackerImpl) Tracked(it trie.NodeIterator) *Iterator {
|
||||
ret := &Iterator{it, tr}
|
||||
tr.startChan <- ret
|
||||
return
|
||||
return ret
|
||||
}
|
||||
|
||||
// StopIterator explicitly stops an iterator
|
||||
func (tr *Tracker) StopIterator(it *Iterator) {
|
||||
tr.stopChan <- it
|
||||
func (tr *TrackerImpl) StopIterator(it *Iterator) {
|
||||
tr.RLock()
|
||||
defer tr.RUnlock()
|
||||
if tr.running {
|
||||
tr.stopChan <- it
|
||||
}
|
||||
}
|
||||
|
||||
// dumps iterator path and bounds to a text file so it can be restored later
|
||||
func (tr *Tracker) dump() error {
|
||||
log.Debug("Dumping recovery state", "to", tr.recoveryFile)
|
||||
// Save dumps iterator path and bounds to a text file so it can be restored later.
|
||||
func (tr *TrackerImpl) Save() error {
|
||||
log.Debug("Saving recovery state", "to", tr.recoveryFile)
|
||||
|
||||
// if the tracker state is empty, erase any existing recovery file
|
||||
if len(tr.started) == 0 {
|
||||
return tr.removeRecoveryFile()
|
||||
}
|
||||
|
||||
var rows [][]string
|
||||
for it := range tr.started {
|
||||
_, endPath := it.Bounds()
|
||||
@ -86,10 +120,18 @@ func (tr *Tracker) dump() error {
|
||||
return out.WriteAll(rows)
|
||||
}
|
||||
|
||||
func (tr *TrackerImpl) removeRecoveryFile() error {
|
||||
err := os.Remove(tr.recoveryFile)
|
||||
if os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Restore attempts to read iterator state from the recovery file.
|
||||
// If the file doesn't exist, returns an empty slice with no error.
|
||||
// Restored iterators are constructed in the same order as in the returned slice.
|
||||
func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIterator, error) {
|
||||
// Restored iterators are constructed in the same order they appear in the returned slice.
|
||||
func (tr *TrackerImpl) Restore(makeIterator iter.IteratorConstructor) ([]*Iterator, error) {
|
||||
file, err := os.Open(tr.recoveryFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
@ -107,7 +149,7 @@ func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIt
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []trie.NodeIterator
|
||||
var ret []*Iterator
|
||||
for _, row := range rows {
|
||||
// pick up where each recovered iterator left off
|
||||
var recoveredPath []byte
|
||||
@ -134,43 +176,39 @@ func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIt
|
||||
ret = append(ret, tr.Tracked(boundIt))
|
||||
}
|
||||
|
||||
log.Debug("Restored iterators", "count", len(ret))
|
||||
return ret, nil
|
||||
return ret, tr.removeRecoveryFile()
|
||||
}
|
||||
|
||||
func (tr *Tracker) HaltAndDump() error {
|
||||
// CloseAndSave stops all tracked iterators and dumps their state to a file.
|
||||
// This closes the tracker, so adding a new iterator will fail.
|
||||
roysc marked this conversation as resolved
Outdated
|
||||
func (tr *TrackerImpl) CloseAndSave() error {
|
||||
tr.Lock()
|
||||
tr.running = false
|
||||
close(tr.stopChan)
|
||||
tr.Unlock()
|
||||
|
||||
// drain any pending iterators
|
||||
close(tr.startChan)
|
||||
for start := range tr.startChan {
|
||||
tr.started[start] = struct{}{}
|
||||
}
|
||||
close(tr.stopChan)
|
||||
for stop := range tr.stopChan {
|
||||
tr.stopped = append(tr.stopped, stop)
|
||||
}
|
||||
|
||||
for _, stop := range tr.stopped {
|
||||
delete(tr.started, stop)
|
||||
}
|
||||
|
||||
if len(tr.started) == 0 {
|
||||
// if the tracker state is empty, erase any existing recovery file
|
||||
err := os.Remove(tr.recoveryFile)
|
||||
if os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return tr.dump()
|
||||
return tr.Save()
|
||||
}
|
||||
|
||||
// Next advances the iterator, notifying its owning tracker when it finishes.
|
||||
func (it *Iterator) Next(descend bool) bool {
|
||||
ret := it.NodeIterator.Next(descend)
|
||||
|
||||
if !ret {
|
||||
roysc marked this conversation as resolved
Outdated
telackey
commented
Why not call Why not call `it.StopIterator()`? I am always nervous about code that requires a precise locking scheme being duplicated multiple places.
roysc
commented
That's fair - I don't think we actually need StopIterator any more, so I will just delete it for now. That's fair - I don't think we actually need StopIterator any more, so I will just delete it for now.
|
||||
it.tracker.RLock()
|
||||
defer it.tracker.RUnlock()
|
||||
if it.tracker.running {
|
||||
it.tracker.stopChan <- it
|
||||
} else {
|
||||
@ -188,9 +226,9 @@ func (it *Iterator) Bounds() ([]byte, []byte) {
|
||||
}
|
||||
|
||||
// Rewinds to the path of the previous (pre-order) node:
|
||||
// If the last byte of the path is zero, pops it. Otherwise, decrements it
|
||||
// and pads with 0xF to 64 bytes (e.g. [1] => [0 f f f ...]).
|
||||
// Returns the passed path (which is also modified in place)
|
||||
// If the last byte of the path is zero, pops it (e.g. [1 0] => [1]).
|
||||
// Otherwise, decrements it and pads with 0xF to 64 bytes (e.g. [1] => [0 f f f ...]).
|
||||
// The passed slice is not modified.
|
||||
func rewindPath(path []byte) []byte {
|
||||
if len(path) == 0 {
|
||||
return path
|
||||
|
69
tracker/tracker_test.go
Normal file
69
tracker/tracker_test.go
Normal file
@ -0,0 +1,69 @@
|
||||
package tracker_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/cerc-io/eth-iterator-utils/internal"
|
||||
"github.com/cerc-io/eth-iterator-utils/tracker"
|
||||
)
|
||||
|
||||
func TestTracker(t *testing.T) {
|
||||
NumIters := uint(1)
|
||||
recoveryFile := filepath.Join(t.TempDir(), "tracker_test.csv")
|
||||
|
||||
tree, edb := internal.OpenFixtureTrie(t, 1)
|
||||
t.Cleanup(func() { edb.Close() })
|
||||
|
||||
// traverse trie and trigger error at some intermediate point
|
||||
N := len(internal.FixtureNodePaths)
|
||||
interrupt := rand.Intn(N/2) + N/4
|
||||
failedTraverse := func() []byte {
|
||||
tr := tracker.New(recoveryFile, NumIters)
|
||||
defer tr.CloseAndSave()
|
||||
|
||||
var prevPath []byte
|
||||
count := 0
|
||||
for it := tr.Tracked(tree.NodeIterator(nil)); it.Next(true); {
|
||||
if count == interrupt {
|
||||
return prevPath // tracker rewinds one node to prevent gaps
|
||||
}
|
||||
prevPath = it.Path()
|
||||
count++
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
failedAt := failedTraverse()
|
||||
if failedAt == nil {
|
||||
t.Fatal("traversal wasn't interrupted")
|
||||
}
|
||||
|
||||
if !fileExists(recoveryFile) {
|
||||
t.Fatal("recovery file wasn't created")
|
||||
}
|
||||
|
||||
tr := tracker.New(recoveryFile, NumIters)
|
||||
its, err := tr.Restore(tree.NodeIterator)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uint(len(its)) != NumIters {
|
||||
t.Fatalf("expected to restore %d iterators, got %d", NumIters, len(its))
|
||||
}
|
||||
if !bytes.Equal(failedAt, its[0].Path()) {
|
||||
t.Fatalf("iterator restored to wrong position: expected %v, got %v", failedAt, its[0].Path())
|
||||
}
|
||||
|
||||
if fileExists(recoveryFile) {
|
||||
t.Fatal("recovery file wasn't removed")
|
||||
}
|
||||
}
|
||||
|
||||
func fileExists(file string) bool {
|
||||
_, err := os.Stat(file)
|
||||
return !os.IsNotExist(err)
|
||||
}
|
Loading…
Reference in New Issue
Block a user
Is it explicit anywhere that you must call CloseAndSave in order to close the channel, set running to false, etc?
What happens if you don't call that, but the iterator completes?
If we need to call it, I think the pattern:
Needs to be documented explicitly somewhere (unless I am just missing something).
If it's not called, it should just be sort of a leak. The channels won't be flushed or saved, but once all its iterators die, the tracker will still be gc'd. No critical consequences that I foresee, but I will document it.