go-ethereum/cmd/geth/snapshot.go
Guillaume Ballet d468c333a7
cmd/{geth,utils}: add cmd to export preimages in snap enumeration order (#28256)
Adds a subcommand: `geth snapshot export-preimages`, to export preimages of every hash found during a snapshot enumeration: that is, it exports _only the active state_, and not _all_ preimages that have been used but are no longer part of the state. 

This tool is needed for the verkle transition, in order to distribute the preimages needed for the conversion. Since only the 'active' preimages are exported, the output is shrunk from ~70GB to ~4GB.

The order of the output is the order used by the snapshot enumeration, which avoids database thrashing. However, it also means that storage-slot preimages are not deduplicated.
2023-11-22 14:48:25 +01:00

692 lines
21 KiB
Go

// Copyright 2021 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"time"
"github.com/ethereum/go-ethereum/cmd/utils"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/state/pruner"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/flags"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
cli "github.com/urfave/cli/v2"
)
var (
snapshotCommand = &cli.Command{
Name: "snapshot",
Usage: "A set of commands based on the snapshot",
Description: "",
Subcommands: []*cli.Command{
{
Name: "prune-state",
Usage: "Prune stale ethereum state data based on the snapshot",
ArgsUsage: "<root>",
Action: pruneState,
Flags: flags.Merge([]cli.Flag{
utils.BloomFilterSizeFlag,
}, utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth snapshot prune-state <state-root>
will prune historical state data with the help of the state snapshot.
All trie nodes and contract codes that do not belong to the specified
version state will be deleted from the database. After pruning, only
two version states are available: genesis and the specific one.
The default pruning target is the HEAD-127 state.
WARNING: it's only supported in hash mode(--state.scheme=hash)".
`,
},
{
Name: "verify-state",
Usage: "Recalculate state hash based on the snapshot for verification",
ArgsUsage: "<root>",
Action: verifyState,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth snapshot verify-state <state-root>
will traverse the whole accounts and storages set based on the specified
snapshot and recalculate the root hash of state for verification.
In other words, this command does the snapshot to trie conversion.
`,
},
{
Name: "check-dangling-storage",
Usage: "Check that there is no 'dangling' snap storage",
ArgsUsage: "<root>",
Action: checkDanglingStorage,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth snapshot check-dangling-storage <state-root> traverses the snap storage
data, and verifies that all snapshot storage data has a corresponding account.
`,
},
{
Name: "inspect-account",
Usage: "Check all snapshot layers for the a specific account",
ArgsUsage: "<address | hash>",
Action: checkAccount,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth snapshot inspect-account <address | hash> checks all snapshot layers and prints out
information about the specified address.
`,
},
{
Name: "traverse-state",
Usage: "Traverse the state with given root hash and perform quick verification",
ArgsUsage: "<root>",
Action: traverseState,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth snapshot traverse-state <state-root>
will traverse the whole state from the given state root and will abort if any
referenced trie node or contract code is missing. This command can be used for
state integrity verification. The default checking target is the HEAD state.
It's also usable without snapshot enabled.
`,
},
{
Name: "traverse-rawstate",
Usage: "Traverse the state with given root hash and perform detailed verification",
ArgsUsage: "<root>",
Action: traverseRawState,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabaseFlags),
Description: `
geth snapshot traverse-rawstate <state-root>
will traverse the whole state from the given root and will abort if any referenced
trie node or contract code is missing. This command can be used for state integrity
verification. The default checking target is the HEAD state. It's basically identical
to traverse-state, but the check granularity is smaller.
It's also usable without snapshot enabled.
`,
},
{
Name: "dump",
Usage: "Dump a specific block from storage (same as 'geth dump' but using snapshots)",
ArgsUsage: "[? <blockHash> | <blockNum>]",
Action: dumpState,
Flags: flags.Merge([]cli.Flag{
utils.ExcludeCodeFlag,
utils.ExcludeStorageFlag,
utils.StartKeyFlag,
utils.DumpLimitFlag,
}, utils.NetworkFlags, utils.DatabaseFlags),
Description: `
This command is semantically equivalent to 'geth dump', but uses the snapshots
as the backend data source, making this command a lot faster.
The argument is interpreted as block number or hash. If none is provided, the latest
block is used.
`,
},
{
Action: snapshotExportPreimages,
Name: "export-preimages",
Usage: "Export the preimage in snapshot enumeration order",
ArgsUsage: "<dumpfile> [<root>]",
Flags: utils.DatabaseFlags,
Description: `
The export-preimages command exports hash preimages to a flat file, in exactly
the expected order for the overlay tree migration.
`,
},
},
}
)
// Deprecation: this command should be deprecated once the hash-based
// scheme is deprecated.
func pruneState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, false)
defer chaindb.Close()
if rawdb.ReadStateScheme(chaindb) != rawdb.HashScheme {
log.Crit("Offline pruning is not required for path scheme")
}
prunerconfig := pruner.Config{
Datadir: stack.ResolvePath(""),
BloomSize: ctx.Uint64(utils.BloomFilterSizeFlag.Name),
}
pruner, err := pruner.NewPruner(chaindb, prunerconfig)
if err != nil {
log.Error("Failed to open snapshot tree", "err", err)
return err
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var targetRoot common.Hash
if ctx.NArg() == 1 {
targetRoot, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
}
if err = pruner.Prune(targetRoot); err != nil {
log.Error("Failed to prune state", "err", err)
return err
}
return nil
}
func verifyState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true, false)
defer triedb.Close()
snapConfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapConfig, chaindb, triedb, headBlock.Root())
if err != nil {
log.Error("Failed to open snapshot tree", "err", err)
return err
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var root = headBlock.Root()
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
}
if err := snaptree.Verify(root); err != nil {
log.Error("Failed to verify state", "root", root, "err", err)
return err
}
log.Info("Verified the state", "root", root)
return snapshot.CheckDanglingStorage(chaindb)
}
// checkDanglingStorage iterates the snap storage data, and verifies that all
// storage also has corresponding account data.
func checkDanglingStorage(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, true)
defer db.Close()
return snapshot.CheckDanglingStorage(db)
}
// traverseState is a helper function used for pruning verification.
// Basically it just iterates the trie, ensure all nodes and associated
// contract codes are present.
func traverseState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true, false)
defer triedb.Close()
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var (
root common.Hash
err error
)
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
log.Info("Start traversing the state", "root", root)
} else {
root = headBlock.Root()
log.Info("Start traversing the state", "root", root, "number", headBlock.NumberU64())
}
t, err := trie.NewStateTrie(trie.StateTrieID(root), triedb)
if err != nil {
log.Error("Failed to open trie", "root", root, "err", err)
return err
}
var (
accounts int
slots int
codes int
lastReport time.Time
start = time.Now()
)
acctIt, err := t.NodeIterator(nil)
if err != nil {
log.Error("Failed to open iterator", "root", root, "err", err)
return err
}
accIter := trie.NewIterator(acctIt)
for accIter.Next() {
accounts += 1
var acc types.StateAccount
if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil {
log.Error("Invalid account encountered during traversal", "err", err)
return err
}
if acc.Root != types.EmptyRootHash {
id := trie.StorageTrieID(root, common.BytesToHash(accIter.Key), acc.Root)
storageTrie, err := trie.NewStateTrie(id, triedb)
if err != nil {
log.Error("Failed to open storage trie", "root", acc.Root, "err", err)
return err
}
storageIt, err := storageTrie.NodeIterator(nil)
if err != nil {
log.Error("Failed to open storage iterator", "root", acc.Root, "err", err)
return err
}
storageIter := trie.NewIterator(storageIt)
for storageIter.Next() {
slots += 1
if time.Since(lastReport) > time.Second*8 {
log.Info("Traversing state", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
}
if storageIter.Err != nil {
log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Err)
return storageIter.Err
}
}
if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash.Bytes()) {
if !rawdb.HasCode(chaindb, common.BytesToHash(acc.CodeHash)) {
log.Error("Code is missing", "hash", common.BytesToHash(acc.CodeHash))
return errors.New("missing code")
}
codes += 1
}
if time.Since(lastReport) > time.Second*8 {
log.Info("Traversing state", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
}
if accIter.Err != nil {
log.Error("Failed to traverse state trie", "root", root, "err", accIter.Err)
return accIter.Err
}
log.Info("State is complete", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
// traverseRawState is a helper function used for pruning verification.
// Basically it just iterates the trie, ensure all nodes and associated
// contract codes are present. It's basically identical to traverseState
// but it will check each trie node.
func traverseRawState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true, false)
defer triedb.Close()
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var (
root common.Hash
err error
)
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
log.Info("Start traversing the state", "root", root)
} else {
root = headBlock.Root()
log.Info("Start traversing the state", "root", root, "number", headBlock.NumberU64())
}
t, err := trie.NewStateTrie(trie.StateTrieID(root), triedb)
if err != nil {
log.Error("Failed to open trie", "root", root, "err", err)
return err
}
var (
nodes int
accounts int
slots int
codes int
lastReport time.Time
start = time.Now()
hasher = crypto.NewKeccakState()
got = make([]byte, 32)
)
accIter, err := t.NodeIterator(nil)
if err != nil {
log.Error("Failed to open iterator", "root", root, "err", err)
return err
}
reader, err := triedb.Reader(root)
if err != nil {
log.Error("State is non-existent", "root", root)
return nil
}
for accIter.Next(true) {
nodes += 1
node := accIter.Hash()
// Check the present for non-empty hash node(embedded node doesn't
// have their own hash).
if node != (common.Hash{}) {
blob, _ := reader.Node(common.Hash{}, accIter.Path(), node)
if len(blob) == 0 {
log.Error("Missing trie node(account)", "hash", node)
return errors.New("missing account")
}
hasher.Reset()
hasher.Write(blob)
hasher.Read(got)
if !bytes.Equal(got, node.Bytes()) {
log.Error("Invalid trie node(account)", "hash", node.Hex(), "value", blob)
return errors.New("invalid account node")
}
}
// If it's a leaf node, yes we are touching an account,
// dig into the storage trie further.
if accIter.Leaf() {
accounts += 1
var acc types.StateAccount
if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil {
log.Error("Invalid account encountered during traversal", "err", err)
return errors.New("invalid account")
}
if acc.Root != types.EmptyRootHash {
id := trie.StorageTrieID(root, common.BytesToHash(accIter.LeafKey()), acc.Root)
storageTrie, err := trie.NewStateTrie(id, triedb)
if err != nil {
log.Error("Failed to open storage trie", "root", acc.Root, "err", err)
return errors.New("missing storage trie")
}
storageIter, err := storageTrie.NodeIterator(nil)
if err != nil {
log.Error("Failed to open storage iterator", "root", acc.Root, "err", err)
return err
}
for storageIter.Next(true) {
nodes += 1
node := storageIter.Hash()
// Check the presence for non-empty hash node(embedded node doesn't
// have their own hash).
if node != (common.Hash{}) {
blob, _ := reader.Node(common.BytesToHash(accIter.LeafKey()), storageIter.Path(), node)
if len(blob) == 0 {
log.Error("Missing trie node(storage)", "hash", node)
return errors.New("missing storage")
}
hasher.Reset()
hasher.Write(blob)
hasher.Read(got)
if !bytes.Equal(got, node.Bytes()) {
log.Error("Invalid trie node(storage)", "hash", node.Hex(), "value", blob)
return errors.New("invalid storage node")
}
}
// Bump the counter if it's leaf node.
if storageIter.Leaf() {
slots += 1
}
if time.Since(lastReport) > time.Second*8 {
log.Info("Traversing state", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
}
if storageIter.Error() != nil {
log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Error())
return storageIter.Error()
}
}
if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash.Bytes()) {
if !rawdb.HasCode(chaindb, common.BytesToHash(acc.CodeHash)) {
log.Error("Code is missing", "account", common.BytesToHash(accIter.LeafKey()))
return errors.New("missing code")
}
codes += 1
}
if time.Since(lastReport) > time.Second*8 {
log.Info("Traversing state", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
}
}
if accIter.Error() != nil {
log.Error("Failed to traverse state trie", "root", root, "err", accIter.Error())
return accIter.Error()
}
log.Info("State is complete", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
func parseRoot(input string) (common.Hash, error) {
var h common.Hash
if err := h.UnmarshalText([]byte(input)); err != nil {
return h, err
}
return h, nil
}
func dumpState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
conf, db, root, err := parseDumpConfig(ctx, stack)
if err != nil {
return err
}
triedb := utils.MakeTrieDatabase(ctx, db, false, true, false)
defer triedb.Close()
snapConfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapConfig, db, triedb, root)
if err != nil {
return err
}
accIt, err := snaptree.AccountIterator(root, common.BytesToHash(conf.Start))
if err != nil {
return err
}
defer accIt.Release()
log.Info("Snapshot dumping started", "root", root)
var (
start = time.Now()
logged = time.Now()
accounts uint64
)
enc := json.NewEncoder(os.Stdout)
enc.Encode(struct {
Root common.Hash `json:"root"`
}{root})
for accIt.Next() {
account, err := types.FullAccount(accIt.Account())
if err != nil {
return err
}
da := &state.DumpAccount{
Balance: account.Balance.String(),
Nonce: account.Nonce,
Root: account.Root.Bytes(),
CodeHash: account.CodeHash,
SecureKey: accIt.Hash().Bytes(),
}
if !conf.SkipCode && !bytes.Equal(account.CodeHash, types.EmptyCodeHash.Bytes()) {
da.Code = rawdb.ReadCode(db, common.BytesToHash(account.CodeHash))
}
if !conf.SkipStorage {
da.Storage = make(map[common.Hash]string)
stIt, err := snaptree.StorageIterator(root, accIt.Hash(), common.Hash{})
if err != nil {
return err
}
for stIt.Next() {
da.Storage[stIt.Hash()] = common.Bytes2Hex(stIt.Slot())
}
}
enc.Encode(da)
accounts++
if time.Since(logged) > 8*time.Second {
log.Info("Snapshot dumping in progress", "at", accIt.Hash(), "accounts", accounts,
"elapsed", common.PrettyDuration(time.Since(start)))
logged = time.Now()
}
if conf.Max > 0 && accounts >= conf.Max {
break
}
}
log.Info("Snapshot dumping complete", "accounts", accounts,
"elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
// snapshotExportPreimages dumps the preimage data to a flat file.
func snapshotExportPreimages(ctx *cli.Context) error {
if ctx.NArg() < 1 {
utils.Fatalf("This command requires an argument.")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true)
defer triedb.Close()
var root common.Hash
if ctx.NArg() > 1 {
rootBytes := common.FromHex(ctx.Args().Get(1))
if len(rootBytes) != common.HashLength {
return fmt.Errorf("invalid hash: %s", ctx.Args().Get(1))
}
root = common.BytesToHash(rootBytes)
} else {
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
root = headBlock.Root()
}
snapConfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapConfig, chaindb, triedb, root)
if err != nil {
return err
}
return utils.ExportSnapshotPreimages(chaindb, snaptree, ctx.Args().First(), root)
}
// checkAccount iterates the snap data layers, and looks up the given account
// across all layers.
func checkAccount(ctx *cli.Context) error {
if ctx.NArg() != 1 {
return errors.New("need <address|hash> arg")
}
var (
hash common.Hash
addr common.Address
)
switch arg := ctx.Args().First(); len(arg) {
case 40, 42:
addr = common.HexToAddress(arg)
hash = crypto.Keccak256Hash(addr.Bytes())
case 64, 66:
hash = common.HexToHash(arg)
default:
return errors.New("malformed address or hash")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
start := time.Now()
log.Info("Checking difflayer journal", "address", addr, "hash", hash)
if err := snapshot.CheckJournalAccount(chaindb, hash); err != nil {
return err
}
log.Info("Checked the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start)))
return nil
}