05aeeab581
* snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
220 lines
5.4 KiB
Go
220 lines
5.4 KiB
Go
package snapshot
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/csv"
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
|
|
"github.com/ethereum/go-ethereum/core/state"
|
|
"github.com/ethereum/go-ethereum/trie"
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
iter "github.com/vulcanize/go-eth-state-node-iterator"
|
|
)
|
|
|
|
type trackedIter struct {
|
|
trie.NodeIterator
|
|
tracker *iteratorTracker
|
|
|
|
seekedPath []byte // latest path seeked from the tracked iterator
|
|
endPath []byte // endPath for the tracked iterator
|
|
}
|
|
|
|
func (it *trackedIter) Next(descend bool) bool {
|
|
ret := it.NodeIterator.Next(descend)
|
|
|
|
if !ret {
|
|
if it.tracker.running {
|
|
it.tracker.stopChan <- it
|
|
} else {
|
|
log.Errorf("iterator stopped after tracker halted: path=%x", it.Path())
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
type iteratorTracker struct {
|
|
recoveryFile string
|
|
|
|
startChan chan *trackedIter
|
|
stopChan chan *trackedIter
|
|
started map[*trackedIter]struct{}
|
|
stopped []*trackedIter
|
|
running bool
|
|
}
|
|
|
|
func newTracker(file string, buf int) iteratorTracker {
|
|
return iteratorTracker{
|
|
recoveryFile: file,
|
|
startChan: make(chan *trackedIter, buf),
|
|
stopChan: make(chan *trackedIter, buf),
|
|
started: map[*trackedIter]struct{}{},
|
|
running: true,
|
|
}
|
|
}
|
|
|
|
func (tr *iteratorTracker) captureSignal(cancelCtx context.CancelFunc) {
|
|
sigChan := make(chan os.Signal, 1)
|
|
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
go func() {
|
|
sig := <-sigChan
|
|
log.Errorf("Signal received (%v), stopping", sig)
|
|
// cancel context on receiving a signal
|
|
// on ctx cancellation, all the iterators complete processing of their current node before stopping
|
|
cancelCtx()
|
|
}()
|
|
}
|
|
|
|
// Wraps an iterator in a trackedIter. This should not be called once halts are possible.
|
|
func (tr *iteratorTracker) tracked(it trie.NodeIterator, recoveredPath []byte) (ret *trackedIter) {
|
|
// create seeked path of max capacity (65)
|
|
iterSeekedPath := make([]byte, 0, 65)
|
|
// intially populate seeked path with the recovered path
|
|
// to be used in trie traversal
|
|
if recoveredPath != nil {
|
|
iterSeekedPath = append(iterSeekedPath, recoveredPath...)
|
|
}
|
|
|
|
// if the iterator being tracked is a PrefixBoundIterator, capture it's end path
|
|
// to be used in trie traversal
|
|
var endPath []byte
|
|
if boundedIter, ok := it.(*iter.PrefixBoundIterator); ok {
|
|
endPath = boundedIter.EndPath
|
|
}
|
|
|
|
ret = &trackedIter{it, tr, iterSeekedPath, endPath}
|
|
tr.startChan <- ret
|
|
return
|
|
}
|
|
|
|
// explicitly stops an iterator
|
|
func (tr *iteratorTracker) stopIter(it *trackedIter) {
|
|
tr.stopChan <- it
|
|
}
|
|
|
|
// dumps iterator path and bounds to a text file so it can be restored later
|
|
func (tr *iteratorTracker) dump() error {
|
|
log.Debug("Dumping recovery state to: ", tr.recoveryFile)
|
|
var rows [][]string
|
|
for it := range tr.started {
|
|
var startPath []byte
|
|
var endPath []byte
|
|
if impl, ok := it.NodeIterator.(*iter.PrefixBoundIterator); ok {
|
|
// if the iterator being tracked is a PrefixBoundIterator,
|
|
// initialize start and end paths with its bounds
|
|
startPath = impl.StartPath
|
|
endPath = impl.EndPath
|
|
}
|
|
|
|
// if seeked path and iterator path are non-empty, use iterator's path as startpath
|
|
if !bytes.Equal(it.seekedPath, []byte{}) && !bytes.Equal(it.Path(), []byte{}) {
|
|
startPath = it.Path()
|
|
}
|
|
|
|
rows = append(rows, []string{
|
|
fmt.Sprintf("%x", startPath),
|
|
fmt.Sprintf("%x", endPath),
|
|
fmt.Sprintf("%x", it.seekedPath),
|
|
})
|
|
}
|
|
|
|
file, err := os.Create(tr.recoveryFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
out := csv.NewWriter(file)
|
|
|
|
return out.WriteAll(rows)
|
|
}
|
|
|
|
// attempts to read iterator state from file
|
|
// if file doesn't exist, returns an empty slice with no error
|
|
func (tr *iteratorTracker) restore(tree state.Trie) ([]trie.NodeIterator, error) {
|
|
file, err := os.Open(tr.recoveryFile)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
log.Debug("Restoring recovery state from: ", tr.recoveryFile)
|
|
defer file.Close()
|
|
in := csv.NewReader(file)
|
|
in.FieldsPerRecord = 3
|
|
rows, err := in.ReadAll()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var ret []trie.NodeIterator
|
|
for _, row := range rows {
|
|
// pick up where each interval left off
|
|
var startPath []byte
|
|
var endPath []byte
|
|
var recoveredPath []byte
|
|
|
|
if len(row[0]) != 0 {
|
|
if _, err = fmt.Sscanf(row[0], "%x", &startPath); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if len(row[1]) != 0 {
|
|
if _, err = fmt.Sscanf(row[1], "%x", &endPath); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if len(row[2]) != 0 {
|
|
if _, err = fmt.Sscanf(row[2], "%x", &recoveredPath); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// force the lower bound path to an even length
|
|
// (required by HexToKeyBytes())
|
|
if len(startPath)&0b1 == 1 {
|
|
// decrement first to avoid skipped nodes
|
|
decrementPath(startPath)
|
|
startPath = append(startPath, 0)
|
|
}
|
|
|
|
it := iter.NewPrefixBoundIterator(tree.NodeIterator(iter.HexToKeyBytes(startPath)), startPath, endPath)
|
|
ret = append(ret, tr.tracked(it, recoveredPath))
|
|
}
|
|
return ret, nil
|
|
}
|
|
|
|
func (tr *iteratorTracker) haltAndDump() error {
|
|
tr.running = false
|
|
|
|
// drain any pending iterators
|
|
close(tr.startChan)
|
|
for start := range tr.startChan {
|
|
tr.started[start] = struct{}{}
|
|
}
|
|
close(tr.stopChan)
|
|
for stop := range tr.stopChan {
|
|
tr.stopped = append(tr.stopped, stop)
|
|
}
|
|
|
|
for _, stop := range tr.stopped {
|
|
delete(tr.started, stop)
|
|
}
|
|
|
|
if len(tr.started) == 0 {
|
|
// if the tracker state is empty, erase any existing recovery file
|
|
err := os.Remove(tr.recoveryFile)
|
|
if os.IsNotExist(err) {
|
|
err = nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
return tr.dump()
|
|
}
|