eth-ipfs-state-validator/pkg/validator.go

293 lines
9.4 KiB
Go
Raw Normal View History

2020-06-25 20:11:50 +00:00
// VulcanizeDB
// Copyright © 2020 Vulcanize
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package validator
import (
2022-08-08 23:28:13 +00:00
"bytes"
2022-08-24 19:09:46 +00:00
"context"
2022-08-08 23:28:13 +00:00
"fmt"
"os"
"os/signal"
"syscall"
"time"
2020-06-25 20:11:50 +00:00
"github.com/ethereum/go-ethereum/common"
2022-08-08 23:28:13 +00:00
"github.com/ethereum/go-ethereum/core/types"
2020-06-26 18:11:30 +00:00
"github.com/ethereum/go-ethereum/crypto"
2020-06-25 20:11:50 +00:00
"github.com/ethereum/go-ethereum/ethdb"
2022-08-08 23:28:13 +00:00
"github.com/ethereum/go-ethereum/rlp"
2024-04-15 11:42:58 +00:00
"github.com/ipfs/boxo/blockservice"
2020-06-25 20:11:50 +00:00
"github.com/jmoiron/sqlx"
"github.com/mailgun/groupcache/v2"
2022-08-24 19:09:46 +00:00
log "github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
2020-06-25 20:11:50 +00:00
iterutils "github.com/cerc-io/eth-iterator-utils"
"github.com/cerc-io/eth-iterator-utils/tracker"
2023-04-29 06:40:03 +00:00
ipfsethdb "github.com/cerc-io/ipfs-ethdb/v5"
pgipfsethdb "github.com/cerc-io/ipfs-ethdb/v5/postgres/v0"
"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/state"
"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie"
2024-04-15 11:42:58 +00:00
"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/triedb"
2020-06-25 20:11:50 +00:00
)
// Validator is used for validating Ethereum state and storage tries on PG-IPFS
type Validator struct {
kvs ethdb.KeyValueStore
2024-04-15 11:42:58 +00:00
trieDB *triedb.Database
2020-06-25 20:11:50 +00:00
stateDatabase state.Database
db *pgipfsethdb.Database
2022-08-08 23:28:13 +00:00
2022-08-24 19:09:46 +00:00
params Params
2020-06-25 20:11:50 +00:00
}
2022-08-24 19:09:46 +00:00
type Params struct {
Workers uint
RecoveryFormat string // %s substituted with traversal type
}
var (
DefaultRecoveryFormat = "./recover_validate_%s"
emptyCodeHash = crypto.Keccak256(nil)
)
2022-08-08 23:28:13 +00:00
type KVSWithAncient struct {
kvs ethdb.KeyValueStore
ethdb.Database
}
func NewKVSDatabaseWithAncient(kvs ethdb.KeyValueStore) ethdb.Database {
return &KVSWithAncient{
kvs: kvs,
}
}
2020-07-13 00:57:47 +00:00
// NewPGIPFSValidator returns a new trie validator ontop of a connection pool for an IPFS backing Postgres database
2022-08-24 19:09:46 +00:00
func NewPGIPFSValidator(db *sqlx.DB, par Params) *Validator {
kvs := pgipfsethdb.NewKeyValueStore(db, pgipfsethdb.CacheConfig{
Name: "kv",
Size: 16 * 1000 * 1000, // 16MB
ExpiryDuration: time.Hour * 8, // 8 hours
})
database := pgipfsethdb.NewDatabase(db, pgipfsethdb.CacheConfig{
Name: "db",
Size: 16 * 1000 * 1000, // 16MB
ExpiryDuration: time.Hour * 8, // 8 hours
})
2022-08-24 19:09:46 +00:00
normalizeParams(&par)
2020-07-13 00:57:47 +00:00
return &Validator{
kvs: kvs,
2024-04-15 11:42:58 +00:00
trieDB: triedb.NewDatabase(NewKVSDatabaseWithAncient(kvs), nil),
2020-07-13 00:57:47 +00:00
stateDatabase: state.NewDatabase(database),
2021-12-29 19:44:32 +00:00
db: database.(*pgipfsethdb.Database),
2022-08-24 19:09:46 +00:00
params: par,
2020-07-13 00:57:47 +00:00
}
}
func (v *Validator) GetCacheStats() groupcache.Stats {
return v.db.GetCacheStats()
}
2020-07-13 00:57:47 +00:00
// NewIPFSValidator returns a new trie validator ontop of an IPFS blockservice
2022-08-24 19:09:46 +00:00
func NewIPFSValidator(bs blockservice.BlockService, par Params) *Validator {
2020-07-13 00:57:47 +00:00
kvs := ipfsethdb.NewKeyValueStore(bs)
database := ipfsethdb.NewDatabase(bs)
2022-08-24 19:09:46 +00:00
normalizeParams(&par)
2020-07-13 00:57:47 +00:00
return &Validator{
kvs: kvs,
2024-04-15 11:42:58 +00:00
trieDB: triedb.NewDatabase(NewKVSDatabaseWithAncient(kvs), nil),
2020-07-13 00:57:47 +00:00
stateDatabase: state.NewDatabase(database),
2022-08-24 19:09:46 +00:00
params: par,
2020-07-13 00:57:47 +00:00
}
}
2020-06-25 20:11:50 +00:00
// NewValidator returns a new trie validator
2020-06-26 18:11:30 +00:00
// Validating the completeness of a modified merkle patricia tries requires traversing the entire trie and verifying that
// every node is present, this is an expensive operation
2020-07-13 00:57:47 +00:00
func NewValidator(kvs ethdb.KeyValueStore, database ethdb.Database) *Validator {
2020-06-25 20:11:50 +00:00
return &Validator{
kvs: kvs,
2024-04-15 11:42:58 +00:00
trieDB: triedb.NewDatabase(NewKVSDatabaseWithAncient(kvs), nil),
2020-06-25 20:11:50 +00:00
stateDatabase: state.NewDatabase(database),
}
}
2022-08-24 19:09:46 +00:00
// Ensure params are valid
func normalizeParams(p *Params) {
if p.Workers == 0 {
p.Workers = 1
2022-08-08 23:28:13 +00:00
}
2022-08-24 19:09:46 +00:00
if len(p.RecoveryFormat) == 0 {
p.RecoveryFormat = DefaultRecoveryFormat
2022-08-08 23:28:13 +00:00
}
}
2020-06-26 18:11:30 +00:00
// ValidateTrie returns an error if the state and storage tries for the provided state root cannot be confirmed as complete
// This does consider child storage tries
func (v *Validator) ValidateTrie(stateRoot common.Hash) error {
2022-08-08 23:28:13 +00:00
t, err := v.stateDatabase.OpenTrie(stateRoot)
2020-06-25 20:11:50 +00:00
if err != nil {
2020-06-26 18:11:30 +00:00
return err
2020-06-25 20:11:50 +00:00
}
2024-04-15 11:42:58 +00:00
iterate := func(ctx context.Context, it trie.NodeIterator) error { return v.iterate(ctx, stateRoot, it, true) }
2022-08-24 19:09:46 +00:00
return iterateTracked(t, fmt.Sprintf(v.params.RecoveryFormat, fullTraversal), v.params.Workers, iterate)
2020-06-26 18:11:30 +00:00
}
// ValidateStateTrie returns an error if the state trie for the provided state root cannot be confirmed as complete
// This does not consider child storage tries
func (v *Validator) ValidateStateTrie(stateRoot common.Hash) error {
// Generate the trie.NodeIterator for this root
t, err := v.stateDatabase.OpenTrie(stateRoot)
if err != nil {
return err
}
2024-04-15 11:42:58 +00:00
iterate := func(ctx context.Context, it trie.NodeIterator) error { return v.iterate(ctx, stateRoot, it, false) }
2022-08-24 19:09:46 +00:00
return iterateTracked(t, fmt.Sprintf(v.params.RecoveryFormat, stateTraversal), v.params.Workers, iterate)
2020-06-26 18:11:30 +00:00
}
// ValidateStorageTrie returns an error if the storage trie for the provided storage root and contract address cannot be confirmed as complete
func (v *Validator) ValidateStorageTrie(stateRoot common.Hash, address common.Address, storageRoot common.Hash) error {
2020-06-26 18:11:30 +00:00
// Generate the state.NodeIterator for this root
addrHash := crypto.Keccak256Hash(address.Bytes())
2024-04-15 11:42:58 +00:00
// Note: the last argument is the redundant state trie, but will be needed for Verkle tries
storage, err := v.stateDatabase.OpenStorageTrie(stateRoot, addrHash, storageRoot, nil)
2020-06-26 18:11:30 +00:00
if err != nil {
return err
}
2024-04-15 11:42:58 +00:00
iterate := func(ctx context.Context, it trie.NodeIterator) error { return v.iterate(ctx, stateRoot, it, false) }
return iterateTracked(storage, fmt.Sprintf(v.params.RecoveryFormat, storageTraversal), v.params.Workers, iterate)
2020-06-25 20:11:50 +00:00
}
2021-10-11 15:18:45 +00:00
// Close implements io.Closer
// it deregisters the groupcache name
func (v *Validator) Close() error {
groupcache.DeregisterGroup("kv")
groupcache.DeregisterGroup("db")
return nil
}
2022-08-24 19:09:46 +00:00
2022-08-30 14:48:12 +00:00
// Traverses one iterator fully
2022-08-24 19:09:46 +00:00
// If storage = true, also traverse storage tries for each leaf.
2024-04-15 11:42:58 +00:00
func (v *Validator) iterate(ctx context.Context, stateRoot common.Hash, it trie.NodeIterator, storage bool) error {
2022-08-24 19:09:46 +00:00
// Iterate through entire state trie. it.Next() will return false when we have
// either completed iteration of the entire trie or run into an error (e.g. a
// missing node). If we are able to iterate through the entire trie without error
// then the trie is complete.
for it.Next(true) {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
2022-08-24 19:09:46 +00:00
// This block adapted from geth - core/state/iterator.go
// If storage is not requested, or the state trie node is an internal entry, skip
if !storage || !it.Leaf() {
continue
}
// Otherwise we've reached an account node, initiate data iteration
var account types.StateAccount
if err := rlp.Decode(bytes.NewReader(it.LeafBlob()), &account); err != nil {
return err
}
2024-04-15 11:42:58 +00:00
// Note: the last argument is the redundant state trie, but will be needed for Verkle tries
dataTrie, err := v.stateDatabase.OpenStorageTrie(stateRoot, common.BytesToHash(it.LeafKey()), account.Root, nil)
if err != nil {
return err
}
dataIt, err := dataTrie.NodeIterator(nil)
2022-08-24 19:09:46 +00:00
if err != nil {
return err
}
2024-04-15 11:42:58 +00:00
2022-08-24 19:09:46 +00:00
if !bytes.Equal(account.CodeHash, emptyCodeHash) {
2024-04-15 11:42:58 +00:00
_, err := v.stateDatabase.ContractCode(common.Address{}, common.BytesToHash(account.CodeHash))
2022-08-24 19:09:46 +00:00
if err != nil {
return fmt.Errorf("code hash %x: %w (path %x)", account.CodeHash, err, iterutils.HexToKeyBytes(it.Path()))
2022-08-24 19:09:46 +00:00
}
}
for dataIt.Next(true) {
}
if dataIt.Error() != nil {
return fmt.Errorf("data iterator error (path %x): %w", iterutils.HexToKeyBytes(dataIt.Path()), dataIt.Error())
2022-08-24 19:09:46 +00:00
}
}
return it.Error()
}
2023-03-10 07:54:39 +00:00
// Traverses each iterator in a separate goroutine.
// Dumps to a recovery file on failure or interrupt.
func iterateTracked(
tree state.Trie,
recoveryFile string,
iterCount uint,
fn func(context.Context, trie.NodeIterator) error,
) error {
2022-08-24 19:09:46 +00:00
tracker := tracker.New(recoveryFile, iterCount)
halt := func() {
log.Errorf("writing recovery file: %s", recoveryFile)
if err := tracker.CloseAndSave(); err != nil {
2022-08-24 19:09:46 +00:00
log.Errorf("failed to write recovery file: %v", err)
}
}
// attempt to restore from recovery file if it exists
iters, _, err := tracker.Restore(tree.NodeIterator)
2022-08-24 19:09:46 +00:00
if err != nil {
return err
}
if iterCount < uint(len(iters)) {
return fmt.Errorf("recovered too many iterators: got %d, expected %d", len(iters), iterCount)
}
if iters == nil { // nothing restored
2024-04-15 11:42:58 +00:00
iters, err = iterutils.SubtrieIterators(tree.NodeIterator, iterCount)
if err != nil {
return err
}
2022-08-24 19:09:46 +00:00
for i, it := range iters {
iters[i] = tracker.Tracked(it)
2022-08-24 19:09:46 +00:00
}
} else {
log.Debugf("restored %d iterators from: %s", len(iters), recoveryFile)
2022-08-24 19:09:46 +00:00
}
ctx, cancel := context.WithCancel(context.Background())
2022-08-24 19:09:46 +00:00
g, ctx := errgroup.WithContext(ctx)
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigChan
log.Errorf("Signal received (%v), stopping", sig)
cancel()
}()
defer halt()
2022-08-24 19:09:46 +00:00
for _, it := range iters {
func(it trie.NodeIterator) {
g.Go(func() error {
return fn(ctx, it)
})
2022-08-24 19:09:46 +00:00
}(it)
}
return g.Wait()
}