From 4a4abc41d45915a6eb8376e569b14db84ad7328a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Thu, 22 Nov 2018 14:09:04 +0200 Subject: [PATCH 1/2] trie: approximate the wasted cache metaspace closer --- trie/database.go | 58 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/trie/database.go b/trie/database.go index 9a4c05d15..49a696bef 100644 --- a/trie/database.go +++ b/trie/database.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "io" + "reflect" "sync" "time" @@ -84,7 +85,8 @@ type Database struct { flushnodes uint64 // Nodes flushed since last commit flushsize common.StorageSize // Data storage flushed since last commit - dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. flushlist) + dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata) + childrenSize common.StorageSize // Storage size of the external children tracking preimagesSize common.StorageSize // Storage size of the preimages cache lock sync.RWMutex @@ -146,6 +148,15 @@ type cachedNode struct { flushNext common.Hash // Next node in the flush-list } +// cachedNodeSize is the raw size of a cachedNode data structure without any +// node data included. It's an approximate size, but should be a lot better +// than not counting them. +var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) + +// cachedNodeChildrenSize is the raw size of an initialized but empty external +// reference map. +const cachedNodeChildrenSize = 48 + // rlp returns the raw rlp encoded blob of the cached node, either directly from // the cache, or by regenerating it from the collapsed node. func (n *cachedNode) rlp() []byte { @@ -300,9 +311,11 @@ func NewDatabaseWithCache(diskdb ethdb.KeyValueStore, cache int) *Database { }) } return &Database{ - diskdb: diskdb, - cleans: cleans, - dirties: map[common.Hash]*cachedNode{{}: {}}, + diskdb: diskdb, + cleans: cleans, + dirties: map[common.Hash]*cachedNode{{}: { + children: make(map[common.Hash]uint16), + }}, preimages: make(map[common.Hash][]byte), } } @@ -491,11 +504,15 @@ func (db *Database) reference(child common.Hash, parent common.Hash) { // If the reference already exists, only duplicate for roots if db.dirties[parent].children == nil { db.dirties[parent].children = make(map[common.Hash]uint16) + db.childrenSize += cachedNodeChildrenSize } else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) { return } node.parents++ db.dirties[parent].children[child]++ + if db.dirties[parent].children[child] == 1 { + db.childrenSize += common.HashLength + 2 // uint16 counter + } } // Dereference removes an existing reference from a root node. @@ -532,6 +549,7 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) { node.children[child]-- if node.children[child] == 0 { delete(node.children, child) + db.childrenSize -= (common.HashLength + 2) // uint16 counter } } // If the child does not exist, it's a previously committed node. @@ -566,6 +584,9 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) { } delete(db.dirties, child) db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) + if node.children != nil { + db.childrenSize -= cachedNodeChildrenSize + } } } @@ -584,8 +605,9 @@ func (db *Database) Cap(limit common.StorageSize) error { // db.dirtiesSize only contains the useful data in the cache, but when reporting // the total memory consumption, the maintenance metadata is also needed to be - // counted. For every useful node, we track 2 extra hashes as the flushlist. - size := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*2*common.HashLength) + // counted. + size := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize) + size += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2)) // If the preimage cache got large enough, push to disk. If it's still small // leave for later to deduplicate writes. @@ -621,10 +643,12 @@ func (db *Database) Cap(limit common.StorageSize) error { batch.Reset() } // Iterate to the next flush item, or abort if the size cap was achieved. Size - // is the total size, including both the useful cached data (hash -> blob), as - // well as the flushlist metadata (2*hash). When flushing items from the cache, - // we need to reduce both. - size -= common.StorageSize(3*common.HashLength + int(node.size)) + // is the total size, including the useful cached data (hash -> blob), the + // cache item metadata, as well as external children mappings. + size -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize) + if node.children != nil { + size -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) + } oldest = node.flushNext } // Flush out any remainder data from the last batch @@ -646,6 +670,9 @@ func (db *Database) Cap(limit common.StorageSize) error { db.oldest = node.flushNext db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) + if node.children != nil { + db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) + } } if db.oldest != (common.Hash{}) { db.dirties[db.oldest].flushPrev = common.Hash{} @@ -803,7 +830,9 @@ func (c *cleaner) Put(key []byte, rlp []byte) error { // Remove the node from the dirty cache delete(c.db.dirties, hash) c.db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - + if node.children != nil { + c.db.dirtiesSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) + } // Move the flushed node into the clean cache to prevent insta-reloads if c.db.cleans != nil { c.db.cleans.Set(string(hash[:]), rlp) @@ -823,9 +852,10 @@ func (db *Database) Size() (common.StorageSize, common.StorageSize) { // db.dirtiesSize only contains the useful data in the cache, but when reporting // the total memory consumption, the maintenance metadata is also needed to be - // counted. For every useful node, we track 2 extra hashes as the flushlist. - var flushlistSize = common.StorageSize((len(db.dirties) - 1) * 2 * common.HashLength) - return db.dirtiesSize + flushlistSize, db.preimagesSize + // counted. + var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize) + var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2)) + return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, db.preimagesSize } // verifyIntegrity is a debug method to iterate over the entire trie stored in From d9d60a5a7f622acd9233d075f009a91a84b62c78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 12 Apr 2019 12:06:43 +0300 Subject: [PATCH 2/2] cmd: special case default cache allowance (4GB mainnet, 128MB ligh) --- cmd/geth/main.go | 14 ++++++++++++++ cmd/utils/flags.go | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cmd/geth/main.go b/cmd/geth/main.go index 4f3849a41..c439e36fc 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -234,6 +234,20 @@ func init() { if err := debug.Setup(ctx, logdir); err != nil { return err } + // If we're a full node on mainnet without --cache specified, bump default cache allowance + if ctx.GlobalString(utils.SyncModeFlag.Name) != "light" && !ctx.GlobalIsSet(utils.CacheFlag.Name) && !ctx.GlobalIsSet(utils.NetworkIdFlag.Name) { + // Make sure we're not on any supported preconfigured testnet either + if !ctx.GlobalIsSet(utils.TestnetFlag.Name) && !ctx.GlobalIsSet(utils.RinkebyFlag.Name) && !ctx.GlobalIsSet(utils.GoerliFlag.Name) { + // Nope, we're really on mainnet. Bump that cache up! + log.Info("Bumping default cache on mainnet", "provided", ctx.GlobalInt(utils.CacheFlag.Name), "updated", 4096) + ctx.GlobalSet(utils.CacheFlag.Name, strconv.Itoa(4096)) + } + } + // If we're running a light client on any network, drop the cache to some meaningfully low amount + if ctx.GlobalString(utils.SyncModeFlag.Name) == "light" && !ctx.GlobalIsSet(utils.CacheFlag.Name) { + log.Info("Dropping default light client cache", "provided", ctx.GlobalInt(utils.CacheFlag.Name), "updated", 128) + ctx.GlobalSet(utils.CacheFlag.Name, strconv.Itoa(128)) + } // Cap the cache allowance and tune the garbage collector var mem gosigar.Mem if err := mem.Get(); err == nil { diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index f5f4cde5b..689202c69 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -331,7 +331,7 @@ var ( // Performance tuning settings CacheFlag = cli.IntFlag{ Name: "cache", - Usage: "Megabytes of memory allocated to internal caching", + Usage: "Megabytes of memory allocated to internal caching (default = 4096 mainnet full node, 128 light mode)", Value: 1024, } CacheDatabaseFlag = cli.IntFlag{