core/state/snapshot: faster account iteration, CLI integration

2020-01-19 20:57:56 +01:00 · 2020-01-19 20:57:56 +01:00 · 19099421dc
commit 19099421dc
parent 6ddb92a089
9 changed files with 69 additions and 38 deletions
--- a/cmd/geth/chaincmd.go
+++ b/cmd/geth/chaincmd.go
@ -79,6 +79,7 @@ The dumpgenesis command dumps the genesis block configuration in JSON format to
 			utils.CacheFlag,
 			utils.SyncModeFlag,
 			utils.GCModeFlag,
+			utils.SnapshotFlag,
 			utils.CacheDatabaseFlag,
 			utils.CacheGCFlag,
 		},
--- a/cmd/geth/main.go
+++ b/cmd/geth/main.go
@ -91,6 +91,7 @@ var (
 		utils.SyncModeFlag,
 		utils.ExitWhenSyncedFlag,
 		utils.GCModeFlag,
+		utils.SnapshotFlag,
 		utils.LightServeFlag,
 		utils.LightLegacyServFlag,
 		utils.LightIngressFlag,
--- a/cmd/utils/flags.go
+++ b/cmd/utils/flags.go
@ -225,6 +225,10 @@ var (
 		Usage: `Blockchain garbage collection mode ("full", "archive")`,
 		Value: "full",
 	}
+	SnapshotFlag = cli.BoolFlag{
+		Name:  "snapshot",
+		Usage: `Enables snapshot-database mode -- experimental work in progress feature`,
+	}
 	LightKDFFlag = cli.BoolFlag{
 		Name:  "lightkdf",
 		Usage: "Reduce key-derivation RAM & CPU usage at some expense of KDF strength",
@ -1471,6 +1475,9 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *eth.Config) {
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheSnapshotFlag.Name) {
 		cfg.SnapshotCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheSnapshotFlag.Name) / 100
 	}
+	if !ctx.GlobalIsSet(SnapshotFlag.Name) {
+		cfg.SnapshotCache = 0 // Disabled
+	}
 	if ctx.GlobalIsSet(DocRootFlag.Name) {
 		cfg.DocRoot = ctx.GlobalString(DocRootFlag.Name)
 	}
@ -1734,6 +1741,9 @@ func MakeChain(ctx *cli.Context, stack *node.Node) (chain *core.BlockChain, chai
 		TrieTimeLimit:       eth.DefaultConfig.TrieTimeout,
 		SnapshotLimit:       eth.DefaultConfig.SnapshotCache,
 	}
+	if !ctx.GlobalIsSet(SnapshotFlag.Name) {
+		cache.SnapshotLimit = 0 // Disabled
+	}
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheTrieFlag.Name) {
 		cache.TrieCleanLimit = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheTrieFlag.Name) / 100
 	}
--- a/core/blockchain.go
+++ b/core/blockchain.go
@ -302,8 +302,9 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 		}
 	}
 	// Load any existing snapshot, regenerating it if loading failed
-	bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
-
+	if bc.cacheConfig.SnapshotLimit > 0 {
+		bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
+	}
 	// Take ownership of this particular state
 	go bc.update()
 	return bc, nil
@ -498,8 +499,9 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error {
 	bc.chainmu.Unlock()

 	// Destroy any existing state snapshot and regenerate it in the background
-	bc.snaps.Rebuild(block.Root())
-
+	if bc.snaps != nil {
+		bc.snaps.Rebuild(block.Root())
+	}
 	log.Info("Committed new head block", "number", block.Number(), "hash", hash)
 	return nil
 }
@ -854,9 +856,12 @@ func (bc *BlockChain) Stop() {
 	bc.wg.Wait()

 	// Ensure that the entirety of the state snapshot is journalled to disk.
-	snapBase, err := bc.snaps.Journal(bc.CurrentBlock().Root())
-	if err != nil {
-		log.Error("Failed to journal state snapshot", "err", err)
+	var snapBase common.Hash
+	if bc.snaps != nil {
+		var err error
+		if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
+			log.Error("Failed to journal state snapshot", "err", err)
+		}
 	}
 	// Ensure the state of a recent block is also stored to disk before exiting.
 	// We're writing three different states to catch different restart scenarios:
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@ -23,6 +23,7 @@ import (
 	"math/rand"
 	"sort"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/ethereum/go-ethereum/common"
@ -92,7 +93,7 @@ type diffLayer struct {
 	memory uint64     // Approximate guess as to how much memory we use

 	root  common.Hash // Root hash to which this snapshot diff belongs to
-	stale bool        // Signals that the layer became stale (state progressed)
+	stale uint32      // Signals that the layer became stale (state progressed)

 	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
 	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
@ -237,10 +238,7 @@ func (dl *diffLayer) Parent() snapshot {
 // Stale return whether this layer has become stale (was flattened across) or if
 // it's still live.
 func (dl *diffLayer) Stale() bool {
-	dl.lock.RLock()
-	defer dl.lock.RUnlock()
-
-	return dl.stale
+	return atomic.LoadUint32(&dl.stale) != 0
 }

 // Account directly retrieves the account associated with a particular hash in
@ -288,7 +286,7 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {

 	// If the layer was flattened into, consider it invalid (any live reference to
 	// the original should be marked as unusable).
-	if dl.stale {
+	if dl.Stale() {
 		return nil, ErrSnapshotStale
 	}
 	// If the account is known locally, return it. Note, a nil account means it was
@ -342,7 +340,7 @@ func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([

 	// If the layer was flattened into, consider it invalid (any live reference to
 	// the original should be marked as unusable).
-	if dl.stale {
+	if dl.Stale() {
 		return nil, ErrSnapshotStale
 	}
 	// If the account is known locally, try to resolve the slot locally. Note, a nil
@ -401,11 +399,9 @@ func (dl *diffLayer) flatten() snapshot {

 	// Before actually writing all our data to the parent, first ensure that the
 	// parent hasn't been 'corrupted' by someone else already flattening into it
-	if parent.stale {
+	if atomic.SwapUint32(&parent.stale, 1) != 0 {
 		panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo
 	}
-	parent.stale = true
-
 	// Overwrite all the updated accounts blindly, merge the sorted list
 	for hash, data := range dl.accountData {
 		parent.accountData[hash] = data
--- a/core/state/snapshot/iterator.go
+++ b/core/state/snapshot/iterator.go
@ -64,7 +64,7 @@ type diffAccountIterator struct {
 	// is explicitly tracked since the referenced diff layer might go stale after
 	// the iterator was positioned and we don't want to fail accessing the old
 	// value as long as the iterator is not touched any more.
-	curAccount []byte
+	//curAccount []byte

 	layer *diffLayer    // Live layer to retrieve values from
 	keys  []common.Hash // Keys left in the layer to iterate
@ -98,22 +98,13 @@ func (it *diffAccountIterator) Next() bool {
 	if len(it.keys) == 0 {
 		return false
 	}
-	// Iterator seems to be still alive, retrieve and cache the live hash and
-	// account value, or fail now if layer became stale
-	it.layer.lock.RLock()
-	defer it.layer.lock.RUnlock()
-
-	if it.layer.stale {
+	if it.layer.Stale() {
 		it.fail, it.keys = ErrSnapshotStale, nil
 		return false
 	}
+	// Iterator seems to be still alive, retrieve and cache the live hash
 	it.curHash = it.keys[0]
-	if blob, ok := it.layer.accountData[it.curHash]; !ok {
-		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
-	} else {
-		it.curAccount = blob
-	}
-	// Values cached, shift the iterator and notify the user of success
+	// key cached, shift the iterator and notify the user of success
 	it.keys = it.keys[1:]
 	return true
 }
@ -130,8 +121,22 @@ func (it *diffAccountIterator) Hash() common.Hash {
 }

 // Account returns the RLP encoded slim account the iterator is currently at.
+// This method may _fail_, if the underlying layer has been flattened between
+// the call to Next and Acccount. That type of error will set it.Err.
+// This method assumes that flattening does not delete elements from
+// the accountdata mapping (writing nil into it is fine though), and will panic
+// if elements have been deleted.
 func (it *diffAccountIterator) Account() []byte {
-	return it.curAccount
+	it.layer.lock.RLock()
+	blob, ok := it.layer.accountData[it.curHash]
+	if !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
+	}
+	it.layer.lock.RUnlock()
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+	}
+	return blob
 }

 // Release is a noop for diff account iterators as there are no held resources.
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
@ -63,8 +63,9 @@ func (its weightedAccountIterators) Swap(i, j int) {
 // fastAccountIterator is a more optimized multi-layer iterator which maintains a
 // direct mapping of all iterators leading down to the bottom layer.
 type fastAccountIterator struct {
-	tree *Tree       // Snapshot tree to reinitialize stale sub-iterators with
-	root common.Hash // Root hash to reinitialize stale sub-iterators through
+	tree       *Tree       // Snapshot tree to reinitialize stale sub-iterators with
+	root       common.Hash // Root hash to reinitialize stale sub-iterators through
+	curAccount []byte

 	iterators weightedAccountIterators
 	initiated bool
@ -160,9 +161,20 @@ func (fi *fastAccountIterator) Next() bool {
 		// Don't forward first time -- we had to 'Next' once in order to
 		// do the sorting already
 		fi.initiated = true
-		return true
+		fi.curAccount = fi.iterators[0].it.Account()
+		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+			fi.fail = innerErr
+		}
+		return fi.Error() == nil
 	}
-	return fi.next(0)
+	if !fi.next(0) {
+		return false
+	}
+	fi.curAccount = fi.iterators[0].it.Account()
+	if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+		fi.fail = innerErr
+	}
+	return fi.Error() == nil
 }

 // next handles the next operation internally and should be invoked when we know
@ -259,7 +271,7 @@ func (fi *fastAccountIterator) Hash() common.Hash {

 // Account returns the current key
 func (fi *fastAccountIterator) Account() []byte {
-	return fi.iterators[0].it.Account()
+	return fi.curAccount
 }

 // Release iterates over all the remaining live layer iterators and releases each
--- a/core/state/snapshot/journal.go
+++ b/core/state/snapshot/journal.go
@ -210,7 +210,7 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()

-	if dl.stale {
+	if dl.Stale() {
 		return common.Hash{}, ErrSnapshotStale
 	}
 	// Everything below was journalled, persist this layer too
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@ -22,6 +22,7 @@ import (
 	"errors"
 	"fmt"
 	"sync"
+	"sync/atomic"

 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
@ -552,7 +553,7 @@ func (t *Tree) Rebuild(root common.Hash) {
 		case *diffLayer:
 			// If the layer is a simple diff, simply mark as stale
 			layer.lock.Lock()
-			layer.stale = true
+			atomic.StoreUint32(&layer.stale, 1)
 			layer.lock.Unlock()

 		default: