cmd, core, eth, graphql, trie: no persisted clean trie cache file (#27525)

The clean trie cache is persisted periodically, therefore Geth can
quickly warmup the cache in next restart.

However it will reduce the robustness of system. The assumption is
held in Geth that if the parent trie node is present, then the entire
sub-trie associated with the parent are all prensent.

Imagine the scenario that Geth rewinds itself to a past block and
restart, but Geth finds the root node of "future state" in clean
cache then regard this state is present in disk, while is not in fact.

Another example is offline pruning tool. Whenever an offline pruning
is performed, the clean cache file has to be removed to aviod hitting
the root node of "deleted states" in clean cache.

All in all, compare with the minor performance gain, system robustness
is something we care more.
This commit is contained in:
rjl493456442 2023-07-04 15:21:06 +08:00 committed by GitHub
parent 6ca3ef9a7b
commit 59f7b289c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 59 additions and 186 deletions

View File

@ -109,15 +109,13 @@ func setupGeth(stack *node.Node) error {
}
backend, err := eth.New(stack, &ethconfig.Config{
Genesis: &chain.genesis,
NetworkId: chain.genesis.Config.ChainID.Uint64(), // 19763
DatabaseCache: 10,
TrieCleanCache: 10,
TrieCleanCacheJournal: "",
TrieCleanCacheRejournal: 60 * time.Minute,
TrieDirtyCache: 16,
TrieTimeout: 60 * time.Minute,
SnapshotCache: 10,
Genesis: &chain.genesis,
NetworkId: chain.genesis.Config.ChainID.Uint64(), // 19763
DatabaseCache: 10,
TrieCleanCache: 10,
TrieDirtyCache: 16,
TrieTimeout: 60 * time.Minute,
SnapshotCache: 10,
})
if err != nil {
return err

View File

@ -276,6 +276,10 @@ func deprecated(field string) bool {
return true
case "ethconfig.Config.EWASMInterpreter":
return true
case "ethconfig.Config.TrieCleanCacheJournal":
return true
case "ethconfig.Config.TrieCleanCacheRejournal":
return true
default:
return false
}

View File

@ -50,7 +50,6 @@ var (
ArgsUsage: "<root>",
Action: pruneState,
Flags: flags.Merge([]cli.Flag{
utils.CacheTrieJournalFlag,
utils.BloomFilterSizeFlag,
}, utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
@ -160,7 +159,7 @@ block is used.
// Deprecation: this command should be deprecated once the hash-based
// scheme is deprecated.
func pruneState(ctx *cli.Context) error {
stack, config := makeConfigNode(ctx)
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, false)
@ -168,7 +167,6 @@ func pruneState(ctx *cli.Context) error {
prunerconfig := pruner.Config{
Datadir: stack.ResolvePath(""),
Cachedir: stack.ResolvePath(config.Eth.TrieCleanCacheJournal),
BloomSize: ctx.Uint64(utils.BloomFilterSizeFlag.Name),
}
pruner, err := pruner.NewPruner(chaindb, prunerconfig)

View File

@ -410,18 +410,6 @@ var (
Value: 15,
Category: flags.PerfCategory,
}
CacheTrieJournalFlag = &cli.StringFlag{
Name: "cache.trie.journal",
Usage: "Disk journal directory for trie cache to survive node restarts",
Value: ethconfig.Defaults.TrieCleanCacheJournal,
Category: flags.PerfCategory,
}
CacheTrieRejournalFlag = &cli.DurationFlag{
Name: "cache.trie.rejournal",
Usage: "Time interval to regenerate the trie cache journal",
Value: ethconfig.Defaults.TrieCleanCacheRejournal,
Category: flags.PerfCategory,
}
CacheGCFlag = &cli.IntFlag{
Name: "cache.gc",
Usage: "Percentage of cache memory allowance to use for trie pruning (default = 25% full mode, 0% archive mode)",
@ -1710,12 +1698,6 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) {
if ctx.IsSet(CacheFlag.Name) || ctx.IsSet(CacheTrieFlag.Name) {
cfg.TrieCleanCache = ctx.Int(CacheFlag.Name) * ctx.Int(CacheTrieFlag.Name) / 100
}
if ctx.IsSet(CacheTrieJournalFlag.Name) {
cfg.TrieCleanCacheJournal = ctx.String(CacheTrieJournalFlag.Name)
}
if ctx.IsSet(CacheTrieRejournalFlag.Name) {
cfg.TrieCleanCacheRejournal = ctx.Duration(CacheTrieRejournalFlag.Name)
}
if ctx.IsSet(CacheFlag.Name) || ctx.IsSet(CacheGCFlag.Name) {
cfg.TrieDirtyCache = ctx.Int(CacheFlag.Name) * ctx.Int(CacheGCFlag.Name) / 100
}

View File

@ -33,6 +33,8 @@ var ShowDeprecated = &cli.Command{
var DeprecatedFlags = []cli.Flag{
NoUSBFlag,
CacheTrieJournalFlag,
CacheTrieRejournalFlag,
}
var (
@ -42,6 +44,17 @@ var (
Usage: "Disables monitoring for and managing USB hardware wallets (deprecated)",
Category: flags.DeprecatedCategory,
}
// (Deprecated June 2023, shown in aliased flags section)
CacheTrieJournalFlag = &cli.StringFlag{
Name: "cache.trie.journal",
Usage: "Disk journal directory for trie cache to survive node restarts",
Category: flags.PerfCategory,
}
CacheTrieRejournalFlag = &cli.DurationFlag{
Name: "cache.trie.rejournal",
Usage: "Time interval to regenerate the trie cache journal",
Category: flags.PerfCategory,
}
)
// showDeprecated displays deprecated flags that will be soon removed from the codebase.

View File

@ -130,8 +130,6 @@ const (
// that's resident in a blockchain.
type CacheConfig struct {
TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory
TrieCleanJournal string // Disk journal for saving clean cache entries.
TrieCleanRejournal time.Duration // Time interval to dump clean cache to disk periodically
TrieCleanNoPrefetch bool // Whether to disable heuristic state prefetching for followup blocks
TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk
TrieDirtyDisabled bool // Whether to disable trie write caching and GC altogether (archive node)
@ -238,7 +236,6 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
// Open trie database with provided config
triedb := trie.NewDatabaseWithConfig(db, &trie.Config{
Cache: cacheConfig.TrieCleanLimit,
Journal: cacheConfig.TrieCleanJournal,
Preimages: cacheConfig.Preimages,
})
// Setup the genesis block, commit the provided genesis specification
@ -411,18 +408,6 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
bc.wg.Add(1)
go bc.updateFutureBlocks()
// If periodic cache journal is required, spin it up.
if bc.cacheConfig.TrieCleanRejournal > 0 {
if bc.cacheConfig.TrieCleanRejournal < time.Minute {
log.Warn("Sanitizing invalid trie cache journal time", "provided", bc.cacheConfig.TrieCleanRejournal, "updated", time.Minute)
bc.cacheConfig.TrieCleanRejournal = time.Minute
}
bc.wg.Add(1)
go func() {
defer bc.wg.Done()
bc.triedb.SaveCachePeriodically(bc.cacheConfig.TrieCleanJournal, bc.cacheConfig.TrieCleanRejournal, bc.quit)
}()
}
// Rewind the chain in case of an incompatible config upgrade.
if compat, ok := genesisErr.(*params.ConfigCompatError); ok {
log.Warn("Rewinding chain to upgrade configuration", "err", compat)
@ -987,11 +972,6 @@ func (bc *BlockChain) Stop() {
if err := bc.stateCache.TrieDB().Close(); err != nil {
log.Error("Failed to close trie db", "err", err)
}
// Ensure all live cached entries be saved into disk, so that we can skip
// cache warmup when node restarts.
if bc.cacheConfig.TrieCleanJournal != "" {
bc.triedb.SaveCache(bc.cacheConfig.TrieCleanJournal)
}
log.Info("Blockchain stopped")
}

View File

@ -57,7 +57,6 @@ const (
// Config includes all the configurations for pruning.
type Config struct {
Datadir string // The directory of the state database
Cachedir string // The directory of state clean cache
BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
}
@ -241,7 +240,7 @@ func (p *Pruner) Prune(root common.Hash) error {
return err
}
if stateBloomRoot != (common.Hash{}) {
return RecoverPruning(p.config.Datadir, p.db, p.config.Cachedir)
return RecoverPruning(p.config.Datadir, p.db)
}
// If the target state root is not specified, use the HEAD-127 as the
// target. The reason for picking it is:
@ -299,12 +298,6 @@ func (p *Pruner) Prune(root common.Hash) error {
log.Info("Selecting user-specified state as the pruning target", "root", root)
}
}
// Before start the pruning, delete the clean trie cache first.
// It's necessary otherwise in the next restart we will hit the
// deleted state root in the "clean cache" so that the incomplete
// state is picked for usage.
deleteCleanTrieCache(p.config.Cachedir)
// All the state roots of the middle layer should be forcibly pruned,
// otherwise the dangling state will be left.
middleRoots := make(map[common.Hash]struct{})
@ -342,7 +335,7 @@ func (p *Pruner) Prune(root common.Hash) error {
// pruning can be resumed. What's more if the bloom filter is constructed, the
// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
// in the disk.
func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error {
func RecoverPruning(datadir string, db ethdb.Database) error {
stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir)
if err != nil {
return err
@ -378,12 +371,6 @@ func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) err
}
log.Info("Loaded state bloom filter", "path", stateBloomPath)
// Before start the pruning, delete the clean trie cache first.
// It's necessary otherwise in the next restart we will hit the
// deleted state root in the "clean cache" so that the incomplete
// state is picked for usage.
deleteCleanTrieCache(trieCachePath)
// All the state roots of the middle layers should be forcibly pruned,
// otherwise the dangling state will be left.
var (
@ -497,23 +484,3 @@ func findBloomFilter(datadir string) (string, common.Hash, error) {
}
return stateBloomPath, stateBloomRoot, nil
}
const warningLog = `
WARNING!
The clean trie cache is not found. Please delete it by yourself after the
pruning. Remember don't start the Geth without deleting the clean trie cache
otherwise the entire database may be damaged!
Check the command description "geth snapshot prune-state --help" for more details.
`
func deleteCleanTrieCache(path string) {
if !common.FileExist(path) {
log.Warn(warningLog)
return
}
os.RemoveAll(path)
log.Info("Deleted trie clean cache", "path", path)
}

View File

@ -132,7 +132,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) {
if err != nil {
return nil, err
}
if err := pruner.RecoverPruning(stack.ResolvePath(""), chainDb, stack.ResolvePath(config.TrieCleanCacheJournal)); err != nil {
if err := pruner.RecoverPruning(stack.ResolvePath(""), chainDb); err != nil {
log.Error("Failed to recover state", "error", err)
}
// Transfer mining-related config to the ethash config.
@ -184,8 +184,6 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) {
}
cacheConfig = &core.CacheConfig{
TrieCleanLimit: config.TrieCleanCache,
TrieCleanJournal: stack.ResolvePath(config.TrieCleanCacheJournal),
TrieCleanRejournal: config.TrieCleanCacheRejournal,
TrieCleanNoPrefetch: config.NoPrefetch,
TrieDirtyLimit: config.TrieDirtyCache,
TrieDirtyDisabled: config.NoPruning,

View File

@ -57,25 +57,23 @@ var LightClientGPO = gasprice.Config{
// Defaults contains default settings for use on the Ethereum main net.
var Defaults = Config{
SyncMode: downloader.SnapSync,
NetworkId: 1,
TxLookupLimit: 2350000,
LightPeers: 100,
UltraLightFraction: 75,
DatabaseCache: 512,
TrieCleanCache: 154,
TrieCleanCacheJournal: "triecache",
TrieCleanCacheRejournal: 60 * time.Minute,
TrieDirtyCache: 256,
TrieTimeout: 60 * time.Minute,
SnapshotCache: 102,
FilterLogCacheSize: 32,
Miner: miner.DefaultConfig,
TxPool: legacypool.DefaultConfig,
RPCGasCap: 50000000,
RPCEVMTimeout: 5 * time.Second,
GPO: FullNodeGPO,
RPCTxFeeCap: 1, // 1 ether
SyncMode: downloader.SnapSync,
NetworkId: 1,
TxLookupLimit: 2350000,
LightPeers: 100,
UltraLightFraction: 75,
DatabaseCache: 512,
TrieCleanCache: 154,
TrieDirtyCache: 256,
TrieTimeout: 60 * time.Minute,
SnapshotCache: 102,
FilterLogCacheSize: 32,
Miner: miner.DefaultConfig,
TxPool: legacypool.DefaultConfig,
RPCGasCap: 50000000,
RPCEVMTimeout: 5 * time.Second,
GPO: FullNodeGPO,
RPCTxFeeCap: 1, // 1 ether
}
//go:generate go run github.com/fjl/gencodec -type Config -formats toml -out gen_config.go
@ -124,13 +122,11 @@ type Config struct {
DatabaseCache int
DatabaseFreezer string
TrieCleanCache int
TrieCleanCacheJournal string `toml:",omitempty"` // Disk journal directory for trie cache to survive node restarts
TrieCleanCacheRejournal time.Duration `toml:",omitempty"` // Time interval to regenerate the journal for clean cache
TrieDirtyCache int
TrieTimeout time.Duration
SnapshotCache int
Preimages bool
TrieCleanCache int
TrieDirtyCache int
TrieTimeout time.Duration
SnapshotCache int
Preimages bool
// This is the number of blocks for which logs will be cached in the filter system.
FilterLogCacheSize int

View File

@ -39,8 +39,6 @@ func (c Config) MarshalTOML() (interface{}, error) {
DatabaseCache int
DatabaseFreezer string
TrieCleanCache int
TrieCleanCacheJournal string `toml:",omitempty"`
TrieCleanCacheRejournal time.Duration `toml:",omitempty"`
TrieDirtyCache int
TrieTimeout time.Duration
SnapshotCache int
@ -81,8 +79,6 @@ func (c Config) MarshalTOML() (interface{}, error) {
enc.DatabaseCache = c.DatabaseCache
enc.DatabaseFreezer = c.DatabaseFreezer
enc.TrieCleanCache = c.TrieCleanCache
enc.TrieCleanCacheJournal = c.TrieCleanCacheJournal
enc.TrieCleanCacheRejournal = c.TrieCleanCacheRejournal
enc.TrieDirtyCache = c.TrieDirtyCache
enc.TrieTimeout = c.TrieTimeout
enc.SnapshotCache = c.SnapshotCache
@ -127,8 +123,6 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error {
DatabaseCache *int
DatabaseFreezer *string
TrieCleanCache *int
TrieCleanCacheJournal *string `toml:",omitempty"`
TrieCleanCacheRejournal *time.Duration `toml:",omitempty"`
TrieDirtyCache *int
TrieTimeout *time.Duration
SnapshotCache *int
@ -218,12 +212,6 @@ func (c *Config) UnmarshalTOML(unmarshal func(interface{}) error) error {
if dec.TrieCleanCache != nil {
c.TrieCleanCache = *dec.TrieCleanCache
}
if dec.TrieCleanCacheJournal != nil {
c.TrieCleanCacheJournal = *dec.TrieCleanCacheJournal
}
if dec.TrieCleanCacheRejournal != nil {
c.TrieCleanCacheRejournal = *dec.TrieCleanCacheRejournal
}
if dec.TrieDirtyCache != nil {
c.TrieDirtyCache = *dec.TrieDirtyCache
}

View File

@ -438,14 +438,12 @@ func createNode(t *testing.T) *node.Node {
func newGQLService(t *testing.T, stack *node.Node, shanghai bool, gspec *core.Genesis, genBlocks int, genfunc func(i int, gen *core.BlockGen)) (*handler, []*types.Block) {
ethConf := &ethconfig.Config{
Genesis: gspec,
NetworkId: 1337,
TrieCleanCache: 5,
TrieCleanCacheJournal: "triecache",
TrieCleanCacheRejournal: 60 * time.Minute,
TrieDirtyCache: 5,
TrieTimeout: 60 * time.Minute,
SnapshotCache: 5,
Genesis: gspec,
NetworkId: 1337,
TrieCleanCache: 5,
TrieDirtyCache: 5,
TrieTimeout: 60 * time.Minute,
SnapshotCache: 5,
}
ethBackend, err := eth.New(stack, ethConf)
if err != nil {

View File

@ -71,7 +71,6 @@ func getChain() *core.BlockChain {
TrieDirtyLimit: 0,
TrieTimeLimit: 5 * time.Minute,
TrieCleanNoPrefetch: true,
TrieCleanRejournal: 0,
SnapshotLimit: 100,
SnapshotWait: true,
}

View File

@ -18,22 +18,18 @@ package trie
import (
"errors"
"runtime"
"time"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie/triedb/hashdb"
"github.com/ethereum/go-ethereum/trie/trienode"
)
// Config defines all necessary options for database.
type Config struct {
Cache int // Memory allowance (MB) to use for caching trie nodes in memory
Journal string // Journal of clean cache to survive node restarts
Preimages bool // Flag whether the preimage of trie key is recorded
Cache int // Memory allowance (MB) to use for caching trie nodes in memory
Preimages bool // Flag whether the preimage of trie key is recorded
}
// backend defines the methods needed to access/update trie nodes in different
@ -79,11 +75,7 @@ type Database struct {
func prepare(diskdb ethdb.Database, config *Config) *Database {
var cleans *fastcache.Cache
if config != nil && config.Cache > 0 {
if config.Journal == "" {
cleans = fastcache.New(config.Cache * 1024 * 1024)
} else {
cleans = fastcache.LoadFromFileOrNew(config.Journal, config.Cache*1024*1024)
}
cleans = fastcache.New(config.Cache * 1024 * 1024)
}
var preimages *preimageStore
if config != nil && config.Preimages {
@ -179,46 +171,6 @@ func (db *Database) WritePreimages() {
}
}
// saveCache saves clean state cache to given directory path
// using specified CPU cores.
func (db *Database) saveCache(dir string, threads int) error {
if db.cleans == nil {
return nil
}
log.Info("Writing clean trie cache to disk", "path", dir, "threads", threads)
start := time.Now()
err := db.cleans.SaveToFileConcurrent(dir, threads)
if err != nil {
log.Error("Failed to persist clean trie cache", "error", err)
return err
}
log.Info("Persisted the clean trie cache", "path", dir, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
// SaveCache atomically saves fast cache data to the given dir using all
// available CPU cores.
func (db *Database) SaveCache(dir string) error {
return db.saveCache(dir, runtime.GOMAXPROCS(0))
}
// SaveCachePeriodically atomically saves fast cache data to the given dir with
// the specified interval. All dump operation will only use a single CPU core.
func (db *Database) SaveCachePeriodically(dir string, interval time.Duration, stopCh <-chan struct{}) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
db.saveCache(dir, 1)
case <-stopCh:
return
}
}
}
// Cap iteratively flushes old but still referenced trie nodes until the total
// memory usage goes below the given threshold. The held pre-images accumulated
// up to this point will be flushed in case the size exceeds the threshold.