2023-09-26 11:34:41 +00:00
|
|
|
// This package provides a way to track multiple concurrently running trie iterators, save their
|
|
|
|
// state to a file on failures or interruptions, and restore them at the positions where they
|
|
|
|
// stopped.
|
|
|
|
//
|
|
|
|
// Example usage:
|
|
|
|
//
|
|
|
|
// tr := tracker.New("recovery.txt", 100)
|
|
|
|
// // Ensure the tracker is closed and saves its state
|
|
|
|
// defer tr.CloseAndSave()
|
|
|
|
//
|
|
|
|
// // Iterate over the trie, from one or multiple threads
|
|
|
|
// it := tr.Tracked(tree.NodeIterator(nil))
|
|
|
|
// for it.Next(true) {
|
|
|
|
// // ... do work that could fail or be interrupted
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// // Later, restore the iterators
|
|
|
|
// tr := tracker.New("recovery.txt", 100)
|
|
|
|
// defer tr.CloseAndSave()
|
|
|
|
//
|
|
|
|
// its, err := tr.Restore(tree.NodeIterator)
|
|
|
|
// for _, it := range its {
|
|
|
|
// // ... resume traversal
|
|
|
|
// }
|
2022-08-19 23:18:31 +00:00
|
|
|
package tracker
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/csv"
|
|
|
|
"fmt"
|
|
|
|
"os"
|
2023-09-26 11:34:41 +00:00
|
|
|
"sync"
|
2022-08-19 23:18:31 +00:00
|
|
|
|
2023-09-19 16:58:23 +00:00
|
|
|
"github.com/ethereum/go-ethereum/log"
|
2022-08-19 23:18:31 +00:00
|
|
|
"github.com/ethereum/go-ethereum/trie"
|
|
|
|
|
2023-09-19 16:58:23 +00:00
|
|
|
iter "github.com/cerc-io/eth-iterator-utils"
|
2022-08-19 23:18:31 +00:00
|
|
|
)
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
// IteratorTracker exposes a minimal interface to register and consume iterators.
|
|
|
|
type IteratorTracker interface {
|
|
|
|
Restore(iter.IteratorConstructor) ([]trie.NodeIterator, []trie.NodeIterator, error)
|
|
|
|
Tracked(trie.NodeIterator) trie.NodeIterator
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ IteratorTracker = &Tracker{}
|
|
|
|
|
|
|
|
// Tracker is a trie iterator tracker which saves state to and restores it from a file.
|
2022-08-19 23:18:31 +00:00
|
|
|
type Tracker struct {
|
2023-09-26 11:34:41 +00:00
|
|
|
*TrackerImpl
|
|
|
|
}
|
2022-08-19 23:18:31 +00:00
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
// New creates a new tracker which saves state to a given file. bufsize sets the size of the
|
|
|
|
// channel buffers used internally to manage tracking. Note that passing a bufsize smaller than the expected
|
|
|
|
// number of concurrent iterators could lead to deadlock.
|
|
|
|
func New(file string, bufsize uint) *Tracker {
|
|
|
|
return &Tracker{NewImpl(file, bufsize)}
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
// Restore attempts to read iterator state from the recovery file.
|
|
|
|
// Returns:
|
|
|
|
// - slice of tracked iterators
|
|
|
|
// - slice of iterators originally returned by constructor
|
|
|
|
// If the file doesn't exist, returns an empty slice with no error.
|
|
|
|
// Restored iterators are constructed in the same order they appear in the returned slice.
|
|
|
|
func (tr *Tracker) Restore(makeIterator iter.IteratorConstructor) (
|
|
|
|
[]trie.NodeIterator, []trie.NodeIterator, error,
|
|
|
|
) {
|
|
|
|
its, bases, err := tr.TrackerImpl.Restore(makeIterator)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var ret []trie.NodeIterator
|
|
|
|
for _, it := range its {
|
|
|
|
ret = append(ret, it)
|
|
|
|
}
|
|
|
|
return ret, bases, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Tracked wraps an iterator in a tracked iterator. This should not be called when the tracker can
|
|
|
|
// potentially be closed.
|
|
|
|
func (tr *Tracker) Tracked(it trie.NodeIterator) trie.NodeIterator {
|
|
|
|
return tr.TrackerImpl.Tracked(it)
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
func NewImpl(file string, bufsize uint) *TrackerImpl {
|
|
|
|
return &TrackerImpl{
|
2022-08-19 23:18:31 +00:00
|
|
|
recoveryFile: file,
|
2023-09-19 16:58:23 +00:00
|
|
|
startChan: make(chan *Iterator, bufsize),
|
|
|
|
stopChan: make(chan *Iterator, bufsize),
|
2022-08-19 23:18:31 +00:00
|
|
|
started: map[*Iterator]struct{}{},
|
|
|
|
running: true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
type TrackerImpl struct {
|
|
|
|
recoveryFile string
|
2022-08-19 23:18:31 +00:00
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
startChan chan *Iterator
|
|
|
|
stopChan chan *Iterator
|
|
|
|
started map[*Iterator]struct{}
|
|
|
|
stopped []*Iterator
|
|
|
|
running bool
|
|
|
|
sync.RWMutex // guards closing of the tracker
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
type Iterator struct {
|
|
|
|
trie.NodeIterator
|
|
|
|
tracker *TrackerImpl
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
func (tr *TrackerImpl) Tracked(it trie.NodeIterator) *Iterator {
|
|
|
|
ret := &Iterator{it, tr}
|
|
|
|
tr.startChan <- ret
|
|
|
|
return ret
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
// Save dumps iterator path and bounds to a text file so it can be restored later.
|
|
|
|
func (tr *TrackerImpl) Save() error {
|
|
|
|
log.Debug("Saving recovery state", "to", tr.recoveryFile)
|
|
|
|
|
|
|
|
// if the tracker state is empty, erase any existing recovery file
|
|
|
|
if len(tr.started) == 0 {
|
|
|
|
return tr.removeRecoveryFile()
|
|
|
|
}
|
|
|
|
|
2022-08-19 23:18:31 +00:00
|
|
|
var rows [][]string
|
|
|
|
for it := range tr.started {
|
2023-09-26 11:34:41 +00:00
|
|
|
_, endPath := it.Bounds()
|
2022-08-19 23:18:31 +00:00
|
|
|
rows = append(rows, []string{
|
2023-09-19 16:58:23 +00:00
|
|
|
fmt.Sprintf("%x", it.Path()),
|
2022-08-19 23:18:31 +00:00
|
|
|
fmt.Sprintf("%x", endPath),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
file, err := os.Create(tr.recoveryFile)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
out := csv.NewWriter(file)
|
|
|
|
|
|
|
|
return out.WriteAll(rows)
|
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
func (tr *TrackerImpl) removeRecoveryFile() error {
|
|
|
|
err := os.Remove(tr.recoveryFile)
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tr *TrackerImpl) Restore(makeIterator iter.IteratorConstructor) (
|
|
|
|
[]*Iterator, []trie.NodeIterator, error,
|
|
|
|
) {
|
2022-08-19 23:18:31 +00:00
|
|
|
file, err := os.Open(tr.recoveryFile)
|
|
|
|
if err != nil {
|
|
|
|
if os.IsNotExist(err) {
|
2023-09-26 11:34:41 +00:00
|
|
|
return nil, nil, nil
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
2023-09-26 11:34:41 +00:00
|
|
|
return nil, nil, err
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
defer file.Close()
|
2023-09-19 16:58:23 +00:00
|
|
|
log.Debug("Restoring recovery state", "from", tr.recoveryFile)
|
|
|
|
|
2022-08-19 23:18:31 +00:00
|
|
|
in := csv.NewReader(file)
|
2023-09-19 16:58:23 +00:00
|
|
|
in.FieldsPerRecord = 2
|
2022-08-19 23:18:31 +00:00
|
|
|
rows, err := in.ReadAll()
|
|
|
|
if err != nil {
|
2023-09-26 11:34:41 +00:00
|
|
|
return nil, nil, err
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
var wrapped []*Iterator
|
|
|
|
var base []trie.NodeIterator
|
2022-08-19 23:18:31 +00:00
|
|
|
for _, row := range rows {
|
2023-09-19 16:58:23 +00:00
|
|
|
// pick up where each recovered iterator left off
|
2022-08-19 23:18:31 +00:00
|
|
|
var recoveredPath []byte
|
2023-09-19 16:58:23 +00:00
|
|
|
var endPath []byte
|
2022-08-19 23:18:31 +00:00
|
|
|
|
|
|
|
if len(row[0]) != 0 {
|
2023-09-19 16:58:23 +00:00
|
|
|
if _, err = fmt.Sscanf(row[0], "%x", &recoveredPath); err != nil {
|
2023-09-26 11:34:41 +00:00
|
|
|
return nil, nil, err
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(row[1]) != 0 {
|
|
|
|
if _, err = fmt.Sscanf(row[1], "%x", &endPath); err != nil {
|
2023-09-26 11:34:41 +00:00
|
|
|
return nil, nil, err
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-06 14:34:52 +00:00
|
|
|
// force the lower bound path to an even length (required by NodeIterator constructor)
|
2023-09-19 16:58:23 +00:00
|
|
|
if len(recoveredPath)&1 == 1 {
|
|
|
|
// to avoid skipped nodes, we must rewind by one index
|
|
|
|
recoveredPath = rewindPath(recoveredPath)
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
2024-03-30 08:54:31 +00:00
|
|
|
it, err := makeIterator(iter.HexToKeyBytes(recoveredPath))
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
2023-09-19 16:58:23 +00:00
|
|
|
boundIt := iter.NewPrefixBoundIterator(it, endPath)
|
2023-09-26 11:34:41 +00:00
|
|
|
wrapped = append(wrapped, tr.Tracked(boundIt))
|
|
|
|
base = append(base, it)
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
return wrapped, base, tr.removeRecoveryFile()
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
// CloseAndSave stops all tracked iterators and dumps their state to a file.
|
|
|
|
// This closes the tracker, so adding a new iterator afterwards will fail.
|
|
|
|
// A new Tracker must be constructed in order to restore state.
|
|
|
|
func (tr *TrackerImpl) CloseAndSave() error {
|
|
|
|
tr.Lock()
|
2022-08-19 23:18:31 +00:00
|
|
|
tr.running = false
|
2023-09-26 11:34:41 +00:00
|
|
|
close(tr.stopChan)
|
|
|
|
tr.Unlock()
|
2022-08-19 23:18:31 +00:00
|
|
|
|
|
|
|
// drain any pending iterators
|
|
|
|
close(tr.startChan)
|
|
|
|
for start := range tr.startChan {
|
|
|
|
tr.started[start] = struct{}{}
|
|
|
|
}
|
|
|
|
for stop := range tr.stopChan {
|
|
|
|
tr.stopped = append(tr.stopped, stop)
|
|
|
|
}
|
|
|
|
for _, stop := range tr.stopped {
|
|
|
|
delete(tr.started, stop)
|
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
return tr.Save()
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
|
2023-09-26 11:34:41 +00:00
|
|
|
// Next advances the iterator, notifying its owning tracker when it finishes.
|
2022-08-19 23:18:31 +00:00
|
|
|
func (it *Iterator) Next(descend bool) bool {
|
|
|
|
ret := it.NodeIterator.Next(descend)
|
|
|
|
|
|
|
|
if !ret {
|
2023-09-26 11:34:41 +00:00
|
|
|
it.tracker.RLock()
|
|
|
|
defer it.tracker.RUnlock()
|
2022-08-19 23:18:31 +00:00
|
|
|
if it.tracker.running {
|
|
|
|
it.tracker.stopChan <- it
|
|
|
|
} else {
|
2023-09-26 11:34:41 +00:00
|
|
|
log.Error("Tracker was closed before iterator finished")
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
2024-07-07 06:13:36 +00:00
|
|
|
// Bounds returns the bounds of the underlying PrefixBoundIterator, if any
|
2023-09-26 11:34:41 +00:00
|
|
|
func (it *Iterator) Bounds() ([]byte, []byte) {
|
|
|
|
if impl, ok := it.NodeIterator.(*iter.PrefixBoundIterator); ok {
|
|
|
|
return impl.Bounds()
|
|
|
|
}
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2024-07-05 12:02:13 +00:00
|
|
|
// Returns the path, rewound to the previous (pre-order) node:
|
|
|
|
// If path is the root (empty) or a leaf path, it's returned.
|
2023-09-26 11:34:41 +00:00
|
|
|
// If the last byte of the path is zero, pops it (e.g. [1 0] => [1]).
|
|
|
|
// Otherwise, decrements it and pads with 0xF to 64 bytes (e.g. [1] => [0 f f f ...]).
|
|
|
|
// The passed slice is not modified.
|
2023-09-19 16:58:23 +00:00
|
|
|
func rewindPath(path []byte) []byte {
|
2024-07-05 12:02:13 +00:00
|
|
|
if len(path) == 0 || path[len(path)-1] == 0x10 {
|
2023-09-19 16:58:23 +00:00
|
|
|
return path
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
2023-09-19 16:58:23 +00:00
|
|
|
if path[len(path)-1] == 0 {
|
|
|
|
return path[:len(path)-1]
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
2023-09-19 16:58:23 +00:00
|
|
|
padded := make([]byte, 64)
|
|
|
|
i := copy(padded, path)
|
2024-07-05 12:02:13 +00:00
|
|
|
padded[len(path)-1]--
|
2023-09-19 16:58:23 +00:00
|
|
|
for ; i < len(padded); i++ {
|
|
|
|
padded[i] = 0xf
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|
2023-09-19 16:58:23 +00:00
|
|
|
return padded
|
2022-08-19 23:18:31 +00:00
|
|
|
}
|