ipld-eth-state-snapshot/pkg/snapshot/service.go

178 lines
5.5 KiB
Go
Raw Normal View History

2020-07-01 18:44:59 +00:00
// Copyright © 2020 Vulcanize, Inc
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
"context"
2020-07-01 18:44:59 +00:00
"fmt"
"math/big"
2023-09-25 10:37:00 +00:00
"os"
"os/signal"
2023-08-04 12:36:56 +00:00
"sync"
2023-09-25 10:37:00 +00:00
"syscall"
2020-07-01 18:44:59 +00:00
2023-09-25 10:37:00 +00:00
"github.com/cerc-io/ipld-eth-state-snapshot/pkg/prom"
2023-08-04 12:36:56 +00:00
statediff "github.com/cerc-io/plugeth-statediff"
"github.com/cerc-io/plugeth-statediff/adapt"
"github.com/cerc-io/plugeth-statediff/indexer"
"github.com/cerc-io/plugeth-statediff/types"
2020-07-01 18:44:59 +00:00
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/rlp"
2022-03-09 13:37:33 +00:00
log "github.com/sirupsen/logrus"
2020-07-01 18:44:59 +00:00
)
var (
emptyNode, _ = rlp.EncodeToBytes(&[]byte{})
emptyCodeHash = crypto.Keccak256([]byte{})
2020-07-01 18:44:59 +00:00
emptyContractRoot = crypto.Keccak256Hash(emptyNode)
defaultBatchSize = uint(100)
2020-07-01 18:44:59 +00:00
)
2021-12-14 06:50:19 +00:00
// Service holds ethDB and stateDB to read data from lvldb and Publisher
// to publish trie in postgres DB.
2020-07-01 18:44:59 +00:00
type Service struct {
2023-08-04 12:36:56 +00:00
ethDB ethdb.Database
stateDB state.Database
indexer indexer.Indexer
maxBatchSize uint
recoveryFile string
2020-07-01 18:44:59 +00:00
}
2022-01-11 05:37:27 +00:00
func NewLevelDB(con *EthConfig) (ethdb.Database, error) {
2023-08-04 12:36:56 +00:00
kvdb, err := rawdb.NewLevelDBDatabase(con.LevelDBPath, 1024, 256, "ipld-eth-state-snapshot", true)
if err != nil {
return nil, fmt.Errorf("failed to connect LevelDB: %s", err)
}
edb, err := rawdb.NewDatabaseWithFreezer(kvdb, con.AncientDBPath, "ipld-eth-state-snapshot", true)
if err != nil {
2023-08-04 12:36:56 +00:00
return nil, fmt.Errorf("failed to connect LevelDB freezer: %s", err)
}
return edb, nil
2022-01-11 05:37:27 +00:00
}
2022-01-11 05:37:27 +00:00
// NewSnapshotService creates Service.
2023-08-27 09:49:46 +00:00
func NewSnapshotService(edb ethdb.Database, indexer indexer.Indexer, recoveryFile string) (*Service, error) {
2020-07-01 18:44:59 +00:00
return &Service{
2023-08-04 12:36:56 +00:00
ethDB: edb,
stateDB: state.NewDatabase(edb),
indexer: indexer,
maxBatchSize: defaultBatchSize,
recoveryFile: recoveryFile,
2020-07-01 18:44:59 +00:00
}, nil
}
2020-08-20 10:23:36 +00:00
type SnapshotParams struct {
2023-08-04 12:36:56 +00:00
WatchedAddresses []common.Address
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
Height uint64
Workers uint
2020-08-20 10:23:36 +00:00
}
2020-08-23 04:38:31 +00:00
func (s *Service) CreateSnapshot(params SnapshotParams) error {
// extract header from lvldb and publish to PG-IPFS
// hold onto the headerID so that we can link the state nodes to this header
2020-08-23 04:38:31 +00:00
hash := rawdb.ReadCanonicalHash(s.ethDB, params.Height)
header := rawdb.ReadHeader(s.ethDB, hash, params.Height)
if header == nil {
2020-08-23 04:38:31 +00:00
return fmt.Errorf("unable to read canonical header at height %d", params.Height)
}
2023-09-25 10:37:00 +00:00
log.WithField("height", params.Height).WithField("hash", hash).Info("Creating snapshot")
2023-08-04 12:36:56 +00:00
// Context for snapshot work
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
ctx, cancelCtx := context.WithCancel(context.Background())
2023-08-04 12:36:56 +00:00
defer cancelCtx()
2023-09-25 10:37:00 +00:00
// Cancel context on receiving a signal. On cancellation, all tracked iterators complete
// processing of their current node before stopping.
captureSignal(cancelCtx)
2023-08-04 12:36:56 +00:00
var err error
tx := s.indexer.BeginTx(header.Number, ctx)
defer tx.RollbackOnFailure(err)
2023-08-04 12:36:56 +00:00
var headerid string
headerid, err = s.indexer.PushHeader(tx, header, big.NewInt(0), big.NewInt(0))
if err != nil {
return err
}
2022-06-08 12:08:17 +00:00
2023-09-25 10:37:00 +00:00
tr := prom.NewTracker(s.recoveryFile, params.Workers)
defer func() {
2023-09-25 10:37:00 +00:00
err := tr.CloseAndSave()
if err != nil {
2022-05-26 10:20:42 +00:00
log.Errorf("failed to write recovery file: %v", err)
}
}()
2023-08-04 12:36:56 +00:00
var nodeMtx, ipldMtx sync.Mutex
nodeSink := func(node types.StateLeafNode) error {
nodeMtx.Lock()
defer nodeMtx.Unlock()
return s.indexer.PushStateNode(tx, node, headerid)
2020-08-20 10:23:36 +00:00
}
2023-08-04 12:36:56 +00:00
ipldSink := func(c types.IPLD) error {
ipldMtx.Lock()
defer ipldMtx.Unlock()
return s.indexer.PushIPLD(tx, c)
2020-07-16 15:02:16 +00:00
}
2020-08-20 10:23:36 +00:00
2023-09-25 10:37:00 +00:00
// Build a diff against the zero hash (empty trie) to get a full snapshot
2023-08-04 12:36:56 +00:00
sdargs := statediff.Args{
NewStateRoot: header.Root,
BlockHash: header.Hash(),
BlockNumber: header.Number,
2020-07-01 18:44:59 +00:00
}
2023-08-04 12:36:56 +00:00
sdparams := statediff.Params{
WatchedAddresses: params.WatchedAddresses,
2022-01-11 00:06:29 +00:00
}
2023-08-04 12:36:56 +00:00
sdparams.ComputeWatchedAddressesLeafPaths()
builder := statediff.NewBuilder(adapt.GethStateView(s.stateDB))
2023-08-27 09:49:46 +00:00
builder.SetSubtrieWorkers(params.Workers)
2023-09-25 10:37:00 +00:00
if err = builder.WriteStateDiffTracked(sdargs, sdparams, nodeSink, ipldSink, tr); err != nil {
2023-05-16 14:22:15 +00:00
return err
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
}
2023-08-04 12:36:56 +00:00
if err = tx.Submit(); err != nil {
return fmt.Errorf("batch transaction submission failed: %w", err)
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
}
2023-08-04 12:36:56 +00:00
return err
2023-04-12 18:07:42 +00:00
}
2023-08-04 12:36:56 +00:00
// CreateLatestSnapshot snapshot at head (ignores height param)
func (s *Service) CreateLatestSnapshot(workers uint, watchedAddresses []common.Address) error {
log.Info("Creating snapshot at head")
hash := rawdb.ReadHeadHeaderHash(s.ethDB)
height := rawdb.ReadHeaderNumber(s.ethDB, hash)
if height == nil {
2023-09-25 11:06:22 +00:00
return fmt.Errorf("unable to read header height for header hash %s", hash)
2023-04-12 18:07:42 +00:00
}
2023-08-04 12:36:56 +00:00
return s.CreateSnapshot(SnapshotParams{Height: *height, Workers: workers, WatchedAddresses: watchedAddresses})
2020-07-01 18:44:59 +00:00
}
2023-09-25 10:37:00 +00:00
func captureSignal(cb func()) {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigChan
log.Errorf("Signal received (%v), stopping", sig)
cb()
}()
}