Add tracker interface and tests #3

Merged
roysc merged 7 commits from tracker-interface into main 2023-09-26 11:34:42 +00:00
4 changed files with 205 additions and 84 deletions
Showing only changes of commit a665825d3c - Show all commits

39
internal/test_helper.go Normal file
View File

@ -0,0 +1,39 @@
package internal
import (
"testing"
"github.com/cerc-io/eth-testing/chaindata/small2"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/ethdb"
)
var (
FixtureNodePaths = small2.Block1_StateNodePaths
FixtureLeafKeys = small2.Block1_StateNodeLeafKeys
)
func OpenFixtureTrie(t *testing.T, height uint64) (state.Trie, ethdb.Database) {
data := small2.ChainData
kvdb, ldberr := rawdb.NewLevelDBDatabase(data.ChainData, 1024, 256, t.Name(), true)
if ldberr != nil {
t.Fatal(ldberr)
}
edb, err := rawdb.NewDatabaseWithFreezer(kvdb, data.Ancient, t.Name(), true)
if err != nil {
t.Fatal(err)
}
hash := rawdb.ReadCanonicalHash(edb, height)
header := rawdb.ReadHeader(edb, hash, height)
if header == nil {
t.Fatalf("unable to read canonical header at height %d", height)
}
sdb := state.NewDatabase(edb)
tree, err := sdb.OpenTrie(header.Root)
if err != nil {
t.Fatal(err)
}
return tree, edb
}

View File

@ -5,11 +5,8 @@ import (
"fmt" "fmt"
"testing" "testing"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
iter "github.com/cerc-io/eth-iterator-utils" iter "github.com/cerc-io/eth-iterator-utils"
fixture "github.com/cerc-io/eth-testing/chaindata/medium" "github.com/cerc-io/eth-iterator-utils/internal"
) )
func TestMakePaths(t *testing.T) { func TestMakePaths(t *testing.T) {
@ -24,30 +21,8 @@ func TestMakePaths(t *testing.T) {
} }
func TestIterator(t *testing.T) { func TestIterator(t *testing.T) {
kvdb, ldberr := rawdb.NewLevelDBDatabase(fixture.ChainDataPath, 1024, 256, "vdb-geth", true) tree, edb := internal.OpenFixtureTrie(t, 1)
if ldberr != nil { t.Cleanup(func() { edb.Close() })
t.Fatal(ldberr)
}
edb, err := rawdb.NewDatabaseWithFreezer(kvdb, fixture.AncientDataPath, "vdb-geth", true)
if err != nil {
t.Fatal(err)
}
if err != nil {
t.Fatal(err)
}
defer edb.Close()
height := uint64(1)
hash := rawdb.ReadCanonicalHash(edb, height)
header := rawdb.ReadHeader(edb, hash, height)
if header == nil {
t.Fatalf("unable to read canonical header at height %d", height)
}
sdb := state.NewDatabase(edb)
tree, err := sdb.OpenTrie(header.Root)
if err != nil {
t.Fatal(err)
}
t.Run("in bounds", func(t *testing.T) { t.Run("in bounds", func(t *testing.T) {
type testCase struct { type testCase struct {
@ -79,7 +54,7 @@ func TestIterator(t *testing.T) {
}) })
t.Run("trie is covered", func(t *testing.T) { t.Run("trie is covered", func(t *testing.T) {
allPaths := fixture.Block1_Paths allPaths := internal.FixtureNodePaths
cases := []uint{1, 2, 4, 8, 16, 32} cases := []uint{1, 2, 4, 8, 16, 32}
runCase := func(t *testing.T, nbins uint) { runCase := func(t *testing.T, nbins uint) {
iters := iter.SubtrieIterators(tree.NodeIterator, nbins) iters := iter.SubtrieIterators(tree.NodeIterator, nbins)

View File

@ -1,12 +1,10 @@
package tracker package tracker
import ( import (
"context"
"encoding/csv" "encoding/csv"
"fmt" "fmt"
"os" "os"
"os/signal" "sync"
"syscall"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie"
@ -14,23 +12,46 @@ import (
iter "github.com/cerc-io/eth-iterator-utils" iter "github.com/cerc-io/eth-iterator-utils"
) )
type Tracker struct { type Tracker interface {
recoveryFile string Restore(iter.IteratorConstructor) ([]trie.NodeIterator, error)
Tracked(trie.NodeIterator) trie.NodeIterator
startChan chan *Iterator CloseAndSave() error
stopChan chan *Iterator
started map[*Iterator]struct{}
stopped []*Iterator
running bool
} }
type Iterator struct { var _ Tracker = &trackerAdaptor{}
trie.NodeIterator
tracker *Tracker // Wrap the tracker state to only expose NodeIterators
type trackerAdaptor struct {
*TrackerImpl
} }
func New(file string, bufsize uint) Tracker { // New creates a new tracker which saves state to a given file. bufsize sets the size of the
return Tracker{ // channel buffers used internally to manage tracking. Note that passing a bufsize smaller than the expected
// number of concurrent iterators could lead to deadlock.
func New(file string, bufsize uint) *trackerAdaptor {
return &trackerAdaptor{NewImpl(file, bufsize)}
}
func (tr *trackerAdaptor) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIterator, error) {
its, err := tr.TrackerImpl.Restore(makeIterator)
if err != nil {
return nil, err
}
var ret []trie.NodeIterator
for _, it := range its {
ret = append(ret, it)
}
return ret, nil
}
func (tr *trackerAdaptor) Tracked(it trie.NodeIterator) trie.NodeIterator {
var trick any = tr.TrackerImpl.Tracked(it)
return trick.(trie.NodeIterator)
}
func NewImpl(file string, bufsize uint) *TrackerImpl {
return &TrackerImpl{
recoveryFile: file, recoveryFile: file,
startChan: make(chan *Iterator, bufsize), startChan: make(chan *Iterator, bufsize),
stopChan: make(chan *Iterator, bufsize), stopChan: make(chan *Iterator, bufsize),
@ -39,34 +60,47 @@ func New(file string, bufsize uint) Tracker {
} }
} }
func (tr *Tracker) CaptureSignal(cancelCtx context.CancelFunc) { type TrackerImpl struct {
sigChan := make(chan os.Signal, 1) recoveryFile string
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) startChan chan *Iterator
go func() { stopChan chan *Iterator
sig := <-sigChan started map[*Iterator]struct{}
log.Error("Signal received (%v), stopping", "signal", sig) stopped []*Iterator
// Cancel context on receiving a signal. On cancellation, all tracked iterators complete running bool
// processing of their current node before stopping. sync.RWMutex // guards closing of the tracker
cancelCtx() }
}()
type Iterator struct {
trie.NodeIterator
tracker *TrackerImpl
} }
// Tracked wraps an iterator in a Iterator. This should not be called once halts are possible. // Tracked wraps an iterator in a Iterator. This should not be called once halts are possible.
func (tr *Tracker) Tracked(it trie.NodeIterator) (ret *Iterator) { func (tr *TrackerImpl) Tracked(it trie.NodeIterator) *Iterator {
ret = &Iterator{it, tr} ret := &Iterator{it, tr}
tr.startChan <- ret tr.startChan <- ret
return return ret
} }
// StopIterator explicitly stops an iterator // StopIterator explicitly stops an iterator
func (tr *Tracker) StopIterator(it *Iterator) { func (tr *TrackerImpl) StopIterator(it *Iterator) {
tr.stopChan <- it tr.RLock()
defer tr.RUnlock()
if tr.running {
tr.stopChan <- it
}
} }
// dumps iterator path and bounds to a text file so it can be restored later // Save dumps iterator path and bounds to a text file so it can be restored later.
func (tr *Tracker) dump() error { func (tr *TrackerImpl) Save() error {
log.Debug("Dumping recovery state", "to", tr.recoveryFile) log.Debug("Saving recovery state", "to", tr.recoveryFile)
// if the tracker state is empty, erase any existing recovery file
if len(tr.started) == 0 {
return tr.removeRecoveryFile()
}
var rows [][]string var rows [][]string
for it := range tr.started { for it := range tr.started {
_, endPath := it.Bounds() _, endPath := it.Bounds()
@ -86,10 +120,18 @@ func (tr *Tracker) dump() error {
return out.WriteAll(rows) return out.WriteAll(rows)
} }
func (tr *TrackerImpl) removeRecoveryFile() error {
err := os.Remove(tr.recoveryFile)
if os.IsNotExist(err) {
err = nil
}
return err
}
// Restore attempts to read iterator state from the recovery file. // Restore attempts to read iterator state from the recovery file.
// If the file doesn't exist, returns an empty slice with no error. // If the file doesn't exist, returns an empty slice with no error.
// Restored iterators are constructed in the same order as in the returned slice. // Restored iterators are constructed in the same order they appear in the returned slice.
func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIterator, error) { func (tr *TrackerImpl) Restore(makeIterator iter.IteratorConstructor) ([]*Iterator, error) {
file, err := os.Open(tr.recoveryFile) file, err := os.Open(tr.recoveryFile)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
@ -107,7 +149,7 @@ func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIt
return nil, err return nil, err
} }
var ret []trie.NodeIterator var ret []*Iterator
for _, row := range rows { for _, row := range rows {
// pick up where each recovered iterator left off // pick up where each recovered iterator left off
var recoveredPath []byte var recoveredPath []byte
@ -134,43 +176,39 @@ func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) ([]trie.NodeIt
ret = append(ret, tr.Tracked(boundIt)) ret = append(ret, tr.Tracked(boundIt))
} }
log.Debug("Restored iterators", "count", len(ret)) return ret, tr.removeRecoveryFile()
return ret, nil
} }
func (tr *Tracker) HaltAndDump() error { // CloseAndSave stops all tracked iterators and dumps their state to a file.
// This closes the tracker, so adding a new iterator will fail.
roysc marked this conversation as resolved Outdated

Is it explicit anywhere that you must call CloseAndSave in order to close the channel, set running to false, etc?

What happens if you don't call that, but the iterator completes?

If we need to call it, I think the pattern:

      tr := tracker.New(recoveryFile, NumIters)
      defer tr.CloseAndSave()  // <--- You must do this!

Needs to be documented explicitly somewhere (unless I am just missing something).

Is it explicit anywhere that you *must* call CloseAndSave in order to close the channel, set running to false, etc? What happens if you don't call that, but the iterator completes? If we need to call it, I think the pattern: ``` tr := tracker.New(recoveryFile, NumIters) defer tr.CloseAndSave() // <--- You must do this! ``` Needs to be documented explicitly somewhere (unless I am just missing something).
Outdated
Review

If it's not called, it should just be sort of a leak. The channels won't be flushed or saved, but once all its iterators die, the tracker will still be gc'd. No critical consequences that I foresee, but I will document it.

If it's not called, it should just be sort of a leak. The channels won't be flushed or saved, but once all its iterators die, the tracker will still be gc'd. No critical consequences that I foresee, but I will document it.
func (tr *TrackerImpl) CloseAndSave() error {
tr.Lock()
tr.running = false tr.running = false
close(tr.stopChan)
tr.Unlock()
// drain any pending iterators // drain any pending iterators
close(tr.startChan) close(tr.startChan)
for start := range tr.startChan { for start := range tr.startChan {
tr.started[start] = struct{}{} tr.started[start] = struct{}{}
} }
close(tr.stopChan)
for stop := range tr.stopChan { for stop := range tr.stopChan {
tr.stopped = append(tr.stopped, stop) tr.stopped = append(tr.stopped, stop)
} }
for _, stop := range tr.stopped { for _, stop := range tr.stopped {
delete(tr.started, stop) delete(tr.started, stop)
} }
if len(tr.started) == 0 { return tr.Save()
// if the tracker state is empty, erase any existing recovery file
err := os.Remove(tr.recoveryFile)
if os.IsNotExist(err) {
err = nil
}
return err
}
return tr.dump()
} }
// Next advances the iterator, notifying its owning tracker when it finishes.
func (it *Iterator) Next(descend bool) bool { func (it *Iterator) Next(descend bool) bool {
ret := it.NodeIterator.Next(descend) ret := it.NodeIterator.Next(descend)
if !ret { if !ret {
roysc marked this conversation as resolved Outdated

Why not call it.StopIterator()? I am always nervous about code that requires a precise locking scheme being duplicated multiple places.

Why not call `it.StopIterator()`? I am always nervous about code that requires a precise locking scheme being duplicated multiple places.
Outdated
Review

That's fair - I don't think we actually need StopIterator any more, so I will just delete it for now.

That's fair - I don't think we actually need StopIterator any more, so I will just delete it for now.
it.tracker.RLock()
defer it.tracker.RUnlock()
if it.tracker.running { if it.tracker.running {
it.tracker.stopChan <- it it.tracker.stopChan <- it
} else { } else {
@ -188,9 +226,9 @@ func (it *Iterator) Bounds() ([]byte, []byte) {
} }
// Rewinds to the path of the previous (pre-order) node: // Rewinds to the path of the previous (pre-order) node:
// If the last byte of the path is zero, pops it. Otherwise, decrements it // If the last byte of the path is zero, pops it (e.g. [1 0] => [1]).
// and pads with 0xF to 64 bytes (e.g. [1] => [0 f f f ...]). // Otherwise, decrements it and pads with 0xF to 64 bytes (e.g. [1] => [0 f f f ...]).
// Returns the passed path (which is also modified in place) // The passed slice is not modified.
func rewindPath(path []byte) []byte { func rewindPath(path []byte) []byte {
if len(path) == 0 { if len(path) == 0 {
return path return path

69
tracker/tracker_test.go Normal file
View File

@ -0,0 +1,69 @@
package tracker_test
import (
"bytes"
"math/rand"
"os"
"path/filepath"
"testing"
"github.com/cerc-io/eth-iterator-utils/internal"
"github.com/cerc-io/eth-iterator-utils/tracker"
)
func TestTracker(t *testing.T) {
NumIters := uint(1)
recoveryFile := filepath.Join(t.TempDir(), "tracker_test.csv")
tree, edb := internal.OpenFixtureTrie(t, 1)
t.Cleanup(func() { edb.Close() })
// traverse trie and trigger error at some intermediate point
N := len(internal.FixtureNodePaths)
interrupt := rand.Intn(N/2) + N/4
failedTraverse := func() []byte {
tr := tracker.New(recoveryFile, NumIters)
defer tr.CloseAndSave()
var prevPath []byte
count := 0
for it := tr.Tracked(tree.NodeIterator(nil)); it.Next(true); {
if count == interrupt {
return prevPath // tracker rewinds one node to prevent gaps
}
prevPath = it.Path()
count++
}
return nil
}
failedAt := failedTraverse()
if failedAt == nil {
t.Fatal("traversal wasn't interrupted")
}
if !fileExists(recoveryFile) {
t.Fatal("recovery file wasn't created")
}
tr := tracker.New(recoveryFile, NumIters)
its, err := tr.Restore(tree.NodeIterator)
if err != nil {
t.Fatal(err)
}
if uint(len(its)) != NumIters {
t.Fatalf("expected to restore %d iterators, got %d", NumIters, len(its))
}
if !bytes.Equal(failedAt, its[0].Path()) {
t.Fatalf("iterator restored to wrong position: expected %v, got %v", failedAt, its[0].Path())
}
if fileExists(recoveryFile) {
t.Fatal("recovery file wasn't removed")
}
}
func fileExists(file string) bool {
_, err := os.Stat(file)
return !os.IsNotExist(err)
}