cmd/{geth,utils}: add cmd to export preimages in snap enumeration order (#28256)

Adds a subcommand: `geth snapshot export-preimages`, to export preimages of every hash found during a snapshot enumeration: that is, it exports _only the active state_, and not _all_ preimages that have been used but are no longer part of the state. 

This tool is needed for the verkle transition, in order to distribute the preimages needed for the conversion. Since only the 'active' preimages are exported, the output is shrunk from ~70GB to ~4GB.

The order of the output is the order used by the snapshot enumeration, which avoids database thrashing. However, it also means that storage-slot preimages are not deduplicated.
This commit is contained in:
Guillaume Ballet 2023-11-22 14:48:25 +01:00 committed by GitHub
parent 5ff929c22f
commit d468c333a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 154 additions and 34 deletions

View File

@ -137,20 +137,7 @@ The import-preimages command imports hash preimages from an RLP encoded stream.
It's deprecated, please use "geth db import" instead.
`,
}
exportPreimagesCommand = &cli.Command{
Action: exportPreimages,
Name: "export-preimages",
Usage: "Export the preimage database into an RLP stream",
ArgsUsage: "<dumpfile>",
Flags: flags.Merge([]cli.Flag{
utils.CacheFlag,
utils.SyncModeFlag,
}, utils.DatabaseFlags),
Description: `
The export-preimages command exports hash preimages to an RLP encoded stream.
It's deprecated, please use "geth db export" instead.
`,
}
dumpCommand = &cli.Command{
Action: dump,
Name: "dump",
@ -386,6 +373,9 @@ func exportChain(ctx *cli.Context) error {
}
// importPreimages imports preimage data from the specified file.
// it is deprecated, and the export function has been removed, but
// the import function is kept around for the time being so that
// older file formats can still be imported.
func importPreimages(ctx *cli.Context) error {
if ctx.Args().Len() < 1 {
utils.Fatalf("This command requires an argument.")
@ -405,25 +395,6 @@ func importPreimages(ctx *cli.Context) error {
return nil
}
// exportPreimages dumps the preimage data to specified json file in streaming way.
func exportPreimages(ctx *cli.Context) error {
if ctx.Args().Len() < 1 {
utils.Fatalf("This command requires an argument.")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, true)
defer db.Close()
start := time.Now()
if err := utils.ExportPreimages(db, ctx.Args().First()); err != nil {
utils.Fatalf("Export error: %v\n", err)
}
fmt.Printf("Export done in %v\n", time.Since(start))
return nil
}
func parseDumpConfig(ctx *cli.Context, stack *node.Node) (*state.DumpConfig, ethdb.Database, common.Hash, error) {
db := utils.MakeChainDatabase(ctx, stack, true)
defer db.Close()

View File

@ -208,7 +208,6 @@ func init() {
importCommand,
exportCommand,
importPreimagesCommand,
exportPreimagesCommand,
removedbCommand,
dumpCommand,
dumpGenesisCommand,

View File

@ -20,6 +20,7 @@ import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"time"
@ -147,6 +148,17 @@ as the backend data source, making this command a lot faster.
The argument is interpreted as block number or hash. If none is provided, the latest
block is used.
`,
},
{
Action: snapshotExportPreimages,
Name: "export-preimages",
Usage: "Export the preimage in snapshot enumeration order",
ArgsUsage: "<dumpfile> [<root>]",
Flags: utils.DatabaseFlags,
Description: `
The export-preimages command exports hash preimages to a flat file, in exactly
the expected order for the overlay tree migration.
`,
},
},
@ -604,6 +616,48 @@ func dumpState(ctx *cli.Context) error {
return nil
}
// snapshotExportPreimages dumps the preimage data to a flat file.
func snapshotExportPreimages(ctx *cli.Context) error {
if ctx.NArg() < 1 {
utils.Fatalf("This command requires an argument.")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true)
defer triedb.Close()
var root common.Hash
if ctx.NArg() > 1 {
rootBytes := common.FromHex(ctx.Args().Get(1))
if len(rootBytes) != common.HashLength {
return fmt.Errorf("invalid hash: %s", ctx.Args().Get(1))
}
root = common.BytesToHash(rootBytes)
} else {
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
root = headBlock.Root()
}
snapConfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapConfig, chaindb, triedb, root)
if err != nil {
return err
}
return utils.ExportSnapshotPreimages(chaindb, snaptree, ctx.Args().First(), root)
}
// checkAccount iterates the snap data layers, and looks up the given account
// across all layers.
func checkAccount(ctx *cli.Context) error {

View File

@ -33,6 +33,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/eth/ethconfig"
@ -374,6 +375,101 @@ func ExportPreimages(db ethdb.Database, fn string) error {
return nil
}
// ExportSnapshotPreimages exports the preimages corresponding to the enumeration of
// the snapshot for a given root.
func ExportSnapshotPreimages(chaindb ethdb.Database, snaptree *snapshot.Tree, fn string, root common.Hash) error {
log.Info("Exporting preimages", "file", fn)
fh, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.ModePerm)
if err != nil {
return err
}
defer fh.Close()
// Enable gzip compressing if file name has gz suffix.
var writer io.Writer = fh
if strings.HasSuffix(fn, ".gz") {
gz := gzip.NewWriter(writer)
defer gz.Close()
writer = gz
}
buf := bufio.NewWriter(writer)
defer buf.Flush()
writer = buf
type hashAndPreimageSize struct {
Hash common.Hash
Size int
}
hashCh := make(chan hashAndPreimageSize)
var (
start = time.Now()
logged = time.Now()
preimages int
)
go func() {
defer close(hashCh)
accIt, err := snaptree.AccountIterator(root, common.Hash{})
if err != nil {
log.Error("Failed to create account iterator", "error", err)
return
}
defer accIt.Release()
for accIt.Next() {
acc, err := types.FullAccount(accIt.Account())
if err != nil {
log.Error("Failed to get full account", "error", err)
return
}
preimages += 1
hashCh <- hashAndPreimageSize{Hash: accIt.Hash(), Size: common.AddressLength}
if acc.Root != (common.Hash{}) && acc.Root != types.EmptyRootHash {
stIt, err := snaptree.StorageIterator(root, accIt.Hash(), common.Hash{})
if err != nil {
log.Error("Failed to create storage iterator", "error", err)
return
}
for stIt.Next() {
preimages += 1
hashCh <- hashAndPreimageSize{Hash: stIt.Hash(), Size: common.HashLength}
if time.Since(logged) > time.Second*8 {
logged = time.Now()
log.Info("Exporting preimages", "count", preimages, "elapsed", common.PrettyDuration(time.Since(start)))
}
}
stIt.Release()
}
if time.Since(logged) > time.Second*8 {
logged = time.Now()
log.Info("Exporting preimages", "count", preimages, "elapsed", common.PrettyDuration(time.Since(start)))
}
}
}()
for item := range hashCh {
preimage := rawdb.ReadPreimage(chaindb, item.Hash)
if len(preimage) == 0 {
return fmt.Errorf("missing preimage for %v", item.Hash)
}
if len(preimage) != item.Size {
return fmt.Errorf("invalid preimage size, have %d", len(preimage))
}
rlpenc, err := rlp.EncodeToBytes(preimage)
if err != nil {
return fmt.Errorf("error encoding preimage: %w", err)
}
if _, err := writer.Write(rlpenc); err != nil {
return fmt.Errorf("failed to write preimage: %w", err)
}
}
log.Info("Exported preimages", "count", preimages, "elapsed", common.PrettyDuration(time.Since(start)), "file", fn)
return nil
}
// exportHeader is used in the export/import flow. When we do an export,
// the first element we output is the exportHeader.
// Whenever a backwards-incompatible change is made, the Version header