ipld-eth-state-snapshot/pkg/snapshot/config.go

176 lines
5.1 KiB
Go
Raw Normal View History

2020-07-01 18:44:59 +00:00
// Copyright © 2020 Vulcanize, Inc
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"fmt"
"time"
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
"github.com/ethereum/go-ethereum/common"
"github.com/sirupsen/logrus"
"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"
2021-12-14 06:50:19 +00:00
ethNode "github.com/ethereum/go-ethereum/statediff/indexer/node"
2020-07-01 18:44:59 +00:00
"github.com/spf13/viper"
)
// SnapshotMode specifies the snapshot data output method
type SnapshotMode string
const (
PgSnapshot SnapshotMode = "postgres"
FileSnapshot SnapshotMode = "file"
defaultOutputDir = "./snapshot_output"
)
2022-01-26 17:30:28 +00:00
// Config contains params for both databases the service uses
type Config struct {
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
Eth *EthConfig
DB *DBConfig
File *FileConfig
Service *ServiceConfig
}
// EthConfig is config parameters for the chain.
type EthConfig struct {
LevelDBPath string
AncientDBPath string
NodeInfo ethNode.Info
2022-01-26 17:30:28 +00:00
}
2022-01-11 00:59:26 +00:00
// DBConfig is config parameters for DB.
type DBConfig struct {
URI string
ConnConfig postgres.Config
2022-01-11 00:59:26 +00:00
}
type FileConfig struct {
OutputDir string
2022-01-11 00:59:26 +00:00
}
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
type ServiceConfig struct {
AllowedAccounts map[common.Address]struct{}
}
func NewConfig(mode SnapshotMode) (*Config, error) {
ret := &Config{
&EthConfig{},
&DBConfig{},
&FileConfig{},
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
&ServiceConfig{},
}
return ret, ret.Init(mode)
2020-07-01 18:44:59 +00:00
}
func NewInPlaceSnapshotConfig() *Config {
ret := &Config{
&EthConfig{},
&DBConfig{},
&FileConfig{},
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
&ServiceConfig{},
}
ret.DB.Init()
return ret
}
2021-12-14 06:50:19 +00:00
// Init Initialises config
func (c *Config) Init(mode SnapshotMode) error {
2022-04-04 19:35:18 +00:00
viper.BindEnv(ETH_NODE_ID_TOML, ETH_NODE_ID)
viper.BindEnv(ETH_CLIENT_NAME_TOML, ETH_CLIENT_NAME)
viper.BindEnv(ETH_GENESIS_BLOCK_TOML, ETH_GENESIS_BLOCK)
viper.BindEnv(ETH_NETWORK_ID_TOML, ETH_NETWORK_ID)
viper.BindEnv(ETH_CHAIN_ID_TOML, ETH_CHAIN_ID)
c.Eth.NodeInfo = ethNode.Info{
2022-04-04 19:35:18 +00:00
ID: viper.GetString(ETH_NODE_ID_TOML),
ClientName: viper.GetString(ETH_CLIENT_NAME_TOML),
GenesisBlock: viper.GetString(ETH_GENESIS_BLOCK_TOML),
NetworkID: viper.GetString(ETH_NETWORK_ID_TOML),
ChainID: viper.GetUint64(ETH_CHAIN_ID_TOML),
2020-07-01 18:44:59 +00:00
}
2022-01-11 00:59:26 +00:00
2022-04-04 19:35:18 +00:00
viper.BindEnv(ANCIENT_DB_PATH_TOML, ANCIENT_DB_PATH)
viper.BindEnv(LVL_DB_PATH_TOML, LVL_DB_PATH)
2022-01-11 00:59:26 +00:00
2022-04-04 19:35:18 +00:00
c.Eth.AncientDBPath = viper.GetString(ANCIENT_DB_PATH_TOML)
c.Eth.LevelDBPath = viper.GetString(LVL_DB_PATH_TOML)
switch mode {
case FileSnapshot:
c.File.Init()
case PgSnapshot:
c.DB.Init()
default:
return fmt.Errorf("no output mode specified")
}
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
return c.Service.Init()
2020-07-01 18:44:59 +00:00
}
func (c *DBConfig) Init() {
2022-04-04 19:35:18 +00:00
viper.BindEnv(DATABASE_NAME_TOML, DATABASE_NAME)
viper.BindEnv(DATABASE_HOSTNAME_TOML, DATABASE_HOSTNAME)
viper.BindEnv(DATABASE_PORT_TOML, DATABASE_PORT)
viper.BindEnv(DATABASE_USER_TOML, DATABASE_USER)
viper.BindEnv(DATABASE_PASSWORD_TOML, DATABASE_PASSWORD)
viper.BindEnv(DATABASE_MAX_IDLE_CONNECTIONS_TOML, DATABASE_MAX_IDLE_CONNECTIONS)
viper.BindEnv(DATABASE_MAX_OPEN_CONNECTIONS_TOML, DATABASE_MAX_OPEN_CONNECTIONS)
viper.BindEnv(DATABASE_MAX_CONN_LIFETIME_TOML, DATABASE_MAX_CONN_LIFETIME)
dbParams := postgres.Config{}
// DB params
2022-04-04 19:35:18 +00:00
dbParams.DatabaseName = viper.GetString(DATABASE_NAME_TOML)
dbParams.Hostname = viper.GetString(DATABASE_HOSTNAME_TOML)
dbParams.Port = viper.GetInt(DATABASE_PORT_TOML)
dbParams.Username = viper.GetString(DATABASE_USER_TOML)
dbParams.Password = viper.GetString(DATABASE_PASSWORD_TOML)
2022-01-11 00:59:26 +00:00
// Connection config
2022-04-04 19:35:18 +00:00
dbParams.MaxIdle = viper.GetInt(DATABASE_MAX_IDLE_CONNECTIONS_TOML)
dbParams.MaxConns = viper.GetInt(DATABASE_MAX_OPEN_CONNECTIONS_TOML)
dbParams.MaxConnLifetime = time.Duration(viper.GetInt(DATABASE_MAX_CONN_LIFETIME_TOML)) * time.Second
c.ConnConfig = dbParams
c.URI = dbParams.DbConnectionString()
}
func (c *FileConfig) Init() error {
2022-04-04 19:35:18 +00:00
viper.BindEnv(FILE_OUTPUT_DIR_TOML, FILE_OUTPUT_DIR)
c.OutputDir = viper.GetString(FILE_OUTPUT_DIR_TOML)
if c.OutputDir == "" {
logrus.Infof("no output directory set, using default: %s", defaultOutputDir)
c.OutputDir = defaultOutputDir
}
return nil
}
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com>
2022-08-03 11:35:04 +00:00
func (c *ServiceConfig) Init() error {
viper.BindEnv(SNAPSHOT_ACCOUNTS_TOML, SNAPSHOT_ACCOUNTS)
var allowedAccounts []string
viper.UnmarshalKey(SNAPSHOT_ACCOUNTS_TOML, &allowedAccounts)
accountsLen := len(allowedAccounts)
if accountsLen != 0 {
c.AllowedAccounts = make(map[common.Address]struct{}, accountsLen)
for _, allowedAccount := range allowedAccounts {
c.AllowedAccounts[common.HexToAddress(allowedAccount)] = struct{}{}
}
} else {
logrus.Infof("no snapshot addresses specified, will perform snapshot of entire trie(s)")
}
return nil
}