ipld-eth-state-snapshot/pkg/snapshot/config.go

182 lines
5.5 KiB
Go
Raw Normal View History

2020-07-01 18:44:59 +00:00
// Copyright © 2020 Vulcanize, Inc
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"fmt"
"time"
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
"github.com/ethereum/go-ethereum/common"
"github.com/sirupsen/logrus"
2023-08-04 12:36:56 +00:00
"github.com/cerc-io/plugeth-statediff/indexer/database/file"
"github.com/cerc-io/plugeth-statediff/indexer/database/sql/postgres"
ethNode "github.com/cerc-io/plugeth-statediff/indexer/node"
2020-07-01 18:44:59 +00:00
"github.com/spf13/viper"
)
// SnapshotMode specifies the snapshot data output method
type SnapshotMode string
const (
PgSnapshot SnapshotMode = "postgres"
FileSnapshot SnapshotMode = "file"
defaultOutputDir = "./snapshot_output"
)
2022-01-26 17:30:28 +00:00
// Config contains params for both databases the service uses
type Config struct {
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
Eth *EthConfig
DB *DBConfig
File *FileConfig
Service *ServiceConfig
}
// EthConfig is config parameters for the chain.
type EthConfig struct {
LevelDBPath string
AncientDBPath string
NodeInfo ethNode.Info
2022-01-26 17:30:28 +00:00
}
2023-08-04 12:36:56 +00:00
// DBConfig contains options for DB output mode.
type DBConfig = postgres.Config
2022-01-11 00:59:26 +00:00
2023-08-04 12:36:56 +00:00
// FileConfig contains options for file output mode. Note that this service currently only supports
// CSV output, and does not record watched addresses, so not all fields are used.
type FileConfig = file.Config
2022-01-11 00:59:26 +00:00
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
type ServiceConfig struct {
2023-08-04 12:36:56 +00:00
AllowedAccounts []common.Address
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
}
func NewConfig(mode SnapshotMode) (*Config, error) {
ret := &Config{
&EthConfig{},
&DBConfig{},
&FileConfig{},
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
&ServiceConfig{},
}
return ret, ret.Init(mode)
2020-07-01 18:44:59 +00:00
}
func NewInPlaceSnapshotConfig() *Config {
ret := &Config{
&EthConfig{},
&DBConfig{},
&FileConfig{},
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
&ServiceConfig{},
}
2023-08-04 12:36:56 +00:00
InitDB(ret.DB)
return ret
}
2021-12-14 06:50:19 +00:00
// Init Initialises config
func (c *Config) Init(mode SnapshotMode) error {
2023-05-09 15:33:32 +00:00
viper.BindEnv(LOGRUS_FILE_TOML, LOGRUS_FILE)
2022-04-04 19:35:18 +00:00
viper.BindEnv(ETH_NODE_ID_TOML, ETH_NODE_ID)
viper.BindEnv(ETH_CLIENT_NAME_TOML, ETH_CLIENT_NAME)
viper.BindEnv(ETH_GENESIS_BLOCK_TOML, ETH_GENESIS_BLOCK)
viper.BindEnv(ETH_NETWORK_ID_TOML, ETH_NETWORK_ID)
viper.BindEnv(ETH_CHAIN_ID_TOML, ETH_CHAIN_ID)
c.Eth.NodeInfo = ethNode.Info{
2022-04-04 19:35:18 +00:00
ID: viper.GetString(ETH_NODE_ID_TOML),
ClientName: viper.GetString(ETH_CLIENT_NAME_TOML),
GenesisBlock: viper.GetString(ETH_GENESIS_BLOCK_TOML),
NetworkID: viper.GetString(ETH_NETWORK_ID_TOML),
ChainID: viper.GetUint64(ETH_CHAIN_ID_TOML),
2020-07-01 18:44:59 +00:00
}
2022-01-11 00:59:26 +00:00
2022-04-04 19:35:18 +00:00
viper.BindEnv(ANCIENT_DB_PATH_TOML, ANCIENT_DB_PATH)
viper.BindEnv(LVL_DB_PATH_TOML, LVL_DB_PATH)
2022-01-11 00:59:26 +00:00
2022-04-04 19:35:18 +00:00
c.Eth.AncientDBPath = viper.GetString(ANCIENT_DB_PATH_TOML)
c.Eth.LevelDBPath = viper.GetString(LVL_DB_PATH_TOML)
switch mode {
case FileSnapshot:
2023-08-04 12:36:56 +00:00
InitFile(c.File)
case PgSnapshot:
2023-08-04 12:36:56 +00:00
InitDB(c.DB)
default:
return fmt.Errorf("no output mode specified")
}
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
return c.Service.Init()
2020-07-01 18:44:59 +00:00
}
2023-08-04 12:36:56 +00:00
func InitDB(c *DBConfig) {
2022-04-04 19:35:18 +00:00
viper.BindEnv(DATABASE_NAME_TOML, DATABASE_NAME)
viper.BindEnv(DATABASE_HOSTNAME_TOML, DATABASE_HOSTNAME)
viper.BindEnv(DATABASE_PORT_TOML, DATABASE_PORT)
viper.BindEnv(DATABASE_USER_TOML, DATABASE_USER)
viper.BindEnv(DATABASE_PASSWORD_TOML, DATABASE_PASSWORD)
viper.BindEnv(DATABASE_MAX_IDLE_CONNECTIONS_TOML, DATABASE_MAX_IDLE_CONNECTIONS)
viper.BindEnv(DATABASE_MAX_OPEN_CONNECTIONS_TOML, DATABASE_MAX_OPEN_CONNECTIONS)
viper.BindEnv(DATABASE_MAX_CONN_LIFETIME_TOML, DATABASE_MAX_CONN_LIFETIME)
// DB params
2023-08-04 12:36:56 +00:00
c.DatabaseName = viper.GetString(DATABASE_NAME_TOML)
c.Hostname = viper.GetString(DATABASE_HOSTNAME_TOML)
c.Port = viper.GetInt(DATABASE_PORT_TOML)
c.Username = viper.GetString(DATABASE_USER_TOML)
c.Password = viper.GetString(DATABASE_PASSWORD_TOML)
2022-01-11 00:59:26 +00:00
// Connection config
2023-08-04 12:36:56 +00:00
c.MaxIdle = viper.GetInt(DATABASE_MAX_IDLE_CONNECTIONS_TOML)
c.MaxConns = viper.GetInt(DATABASE_MAX_OPEN_CONNECTIONS_TOML)
c.MaxConnLifetime = time.Duration(viper.GetInt(DATABASE_MAX_CONN_LIFETIME_TOML)) * time.Second
2023-08-04 12:36:56 +00:00
c.Driver = postgres.PGX
}
2023-08-04 12:36:56 +00:00
func InitFile(c *FileConfig) error {
2022-04-04 19:35:18 +00:00
viper.BindEnv(FILE_OUTPUT_DIR_TOML, FILE_OUTPUT_DIR)
c.OutputDir = viper.GetString(FILE_OUTPUT_DIR_TOML)
if c.OutputDir == "" {
logrus.Infof("no output directory set, using default: %s", defaultOutputDir)
c.OutputDir = defaultOutputDir
}
2023-08-04 12:36:56 +00:00
// Only support CSV for now
c.Mode = file.CSV
return nil
}
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
func (c *ServiceConfig) Init() error {
2023-05-09 15:33:32 +00:00
viper.BindEnv(SNAPSHOT_BLOCK_HEIGHT_TOML, SNAPSHOT_BLOCK_HEIGHT)
viper.BindEnv(SNAPSHOT_MODE_TOML, SNAPSHOT_MODE)
viper.BindEnv(SNAPSHOT_WORKERS_TOML, SNAPSHOT_WORKERS)
viper.BindEnv(PROM_HTTP_TOML, PROM_HTTP)
viper.BindEnv(PROM_HTTP_ADDR_TOML, PROM_HTTP_ADDR)
viper.BindEnv(PROM_METRICS_TOML, PROM_METRICS)
2023-05-09 15:33:32 +00:00
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
viper.BindEnv(SNAPSHOT_ACCOUNTS_TOML, SNAPSHOT_ACCOUNTS)
var allowedAccounts []string
viper.UnmarshalKey(SNAPSHOT_ACCOUNTS_TOML, &allowedAccounts)
accountsLen := len(allowedAccounts)
if accountsLen != 0 {
2023-08-04 12:36:56 +00:00
c.AllowedAccounts = make([]common.Address, 0, accountsLen)
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
for _, allowedAccount := range allowedAccounts {
2023-08-04 12:36:56 +00:00
c.AllowedAccounts = append(c.AllowedAccounts, common.HexToAddress(allowedAccount))
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
}
} else {
logrus.Infof("no snapshot addresses specified, will perform snapshot of entire trie(s)")
}
return nil
}