From 542df8898ef6d718647058c129069804bc463ea5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 6 Aug 2019 13:40:28 +0300
Subject: [PATCH 01/28] core: initial version of state snapshots

---
 accounts/abi/bind/backends/simulated.go  |   6 +-
 cmd/evm/runner.go                        |   4 +-
 cmd/geth/chaincmd.go                     |   2 +-
 core/blockchain.go                       |  56 ++--
 core/chain_makers.go                     |   2 +-
 core/genesis.go                          |   4 +-
 core/rawdb/accessors_snapshot.go         |  99 +++++++
 core/rawdb/database.go                   |   8 +
 core/rawdb/schema.go                     |  23 +-
 core/state/snapshot/account.go           |  54 ++++
 core/state/snapshot/difflayer.go         | 337 +++++++++++++++++++++++
 core/state/snapshot/difflayer_journal.go | 140 ++++++++++
 core/state/snapshot/disklayer.go         | 115 ++++++++
 core/state/snapshot/generate.go          | 212 ++++++++++++++
 core/state/snapshot/generate_test.go     | 111 ++++++++
 core/state/snapshot/snapshot.go          | 244 ++++++++++++++++
 core/state/snapshot/snapshot_test.go     |  17 ++
 core/state/snapshot/sort.go              |  62 +++++
 core/state/state_object.go               |  59 +++-
 core/state/statedb.go                    | 129 +++++++--
 core/state_prefetcher.go                 |   2 +
 core/vm/runtime/runtime.go               |   4 +-
 core/vm/runtime/runtime_test.go          |   4 +-
 eth/api_test.go                          |   6 +-
 eth/api_tracer.go                        |   6 +-
 eth/handler_test.go                      |   2 +-
 light/odr_test.go                        |   4 +-
 light/trie.go                            |   2 +-
 tests/state_test_util.go                 |   4 +-
 trie/iterator.go                         |   2 +
 30 files changed, 1635 insertions(+), 85 deletions(-)
 create mode 100644 core/rawdb/accessors_snapshot.go
 create mode 100644 core/state/snapshot/account.go
 create mode 100644 core/state/snapshot/difflayer.go
 create mode 100644 core/state/snapshot/difflayer_journal.go
 create mode 100644 core/state/snapshot/disklayer.go
 create mode 100644 core/state/snapshot/generate.go
 create mode 100644 core/state/snapshot/generate_test.go
 create mode 100644 core/state/snapshot/snapshot.go
 create mode 100644 core/state/snapshot/snapshot_test.go
 create mode 100644 core/state/snapshot/sort.go

diff --git a/accounts/abi/bind/backends/simulated.go b/accounts/abi/bind/backends/simulated.go
index f7f3dec83..2dbc59356 100644
--- a/accounts/abi/bind/backends/simulated.go
+++ b/accounts/abi/bind/backends/simulated.go
@@ -124,7 +124,7 @@ func (b *SimulatedBackend) rollback() {
 	statedb, _ := b.blockchain.State()
 
 	b.pendingBlock = blocks[0]
-	b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database())
+	b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database(), nil)
 }
 
 // stateByBlockNumber retrieves a state by a given blocknumber.
@@ -480,7 +480,7 @@ func (b *SimulatedBackend) SendTransaction(ctx context.Context, tx *types.Transa
 	statedb, _ := b.blockchain.State()
 
 	b.pendingBlock = blocks[0]
-	b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database())
+	b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database(), nil)
 	return nil
 }
 
@@ -593,7 +593,7 @@ func (b *SimulatedBackend) AdjustTime(adjustment time.Duration) error {
 	statedb, _ := b.blockchain.State()
 
 	b.pendingBlock = blocks[0]
-	b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database())
+	b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database(), nil)
 
 	return nil
 }
diff --git a/cmd/evm/runner.go b/cmd/evm/runner.go
index da301ff5e..0a9c19f5b 100644
--- a/cmd/evm/runner.go
+++ b/cmd/evm/runner.go
@@ -129,10 +129,10 @@ func runCmd(ctx *cli.Context) error {
 		genesisConfig = gen
 		db := rawdb.NewMemoryDatabase()
 		genesis := gen.ToBlock(db)
-		statedb, _ = state.New(genesis.Root(), state.NewDatabase(db))
+		statedb, _ = state.New(genesis.Root(), state.NewDatabase(db), nil)
 		chainConfig = gen.Config
 	} else {
-		statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		genesisConfig = new(core.Genesis)
 	}
 	if ctx.GlobalString(SenderFlag.Name) != "" {
diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go
index 5b176b6da..9d4835a16 100644
--- a/cmd/geth/chaincmd.go
+++ b/cmd/geth/chaincmd.go
@@ -544,7 +544,7 @@ func dump(ctx *cli.Context) error {
 			fmt.Println("{}")
 			utils.Fatalf("block not found")
 		} else {
-			state, err := state.New(block.Root(), state.NewDatabase(chainDb))
+			state, err := state.New(block.Root(), state.NewDatabase(chainDb), nil)
 			if err != nil {
 				utils.Fatalf("could not create new state: %v", err)
 			}
diff --git a/core/blockchain.go b/core/blockchain.go
index d7fcbd5e3..676a72c77 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -34,6 +34,7 @@ import (
 	"github.com/ethereum/go-ethereum/consensus"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/core/state"
+	"github.com/ethereum/go-ethereum/core/state/snapshot"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/core/vm"
 	"github.com/ethereum/go-ethereum/ethdb"
@@ -61,6 +62,10 @@ var (
 	storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil)
 	storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil)
 
+	snapshotAccountReadTimer = metrics.NewRegisteredTimer("chain/snapshot/accountreads", nil)
+	snapshotStorageReadTimer = metrics.NewRegisteredTimer("chain/snapshot/storagereads", nil)
+	snapshotCommitTimer      = metrics.NewRegisteredTimer("chain/snapshot/commits", nil)
+
 	blockInsertTimer     = metrics.NewRegisteredTimer("chain/inserts", nil)
 	blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil)
 	blockExecutionTimer  = metrics.NewRegisteredTimer("chain/execution", nil)
@@ -135,9 +140,10 @@ type BlockChain struct {
 	chainConfig *params.ChainConfig // Chain & network configuration
 	cacheConfig *CacheConfig        // Cache configuration for pruning
 
-	db     ethdb.Database // Low level persistent database to store final content in
-	triegc *prque.Prque   // Priority queue mapping block numbers to tries to gc
-	gcproc time.Duration  // Accumulates canonical block processing for trie dumping
+	db     ethdb.Database         // Low level persistent database to store final content in
+	snaps  *snapshot.SnapshotTree // Snapshot tree for fast trie leaf access
+	triegc *prque.Prque           // Priority queue mapping block numbers to tries to gc
+	gcproc time.Duration          // Accumulates canonical block processing for trie dumping
 
 	hc            *HeaderChain
 	rmLogsFeed    event.Feed
@@ -293,6 +299,11 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 			}
 		}
 	}
+	// Load any existing snapshot, regenerating it if loading failed
+	head := bc.CurrentBlock()
+	if bc.snaps, err = snapshot.New(bc.db, "snapshot.rlp", head.NumberU64(), head.Root()); err != nil {
+		return nil, err
+	}
 	// Take ownership of this particular state
 	go bc.update()
 	return bc, nil
@@ -339,7 +350,7 @@ func (bc *BlockChain) loadLastState() error {
 		return bc.Reset()
 	}
 	// Make sure the state associated with the block is available
-	if _, err := state.New(currentBlock.Root(), bc.stateCache); err != nil {
+	if _, err := state.New(currentBlock.Root(), bc.stateCache, bc.snaps); err != nil {
 		// Dangling block without a state associated, init from scratch
 		log.Warn("Head state missing, repairing chain", "number", currentBlock.Number(), "hash", currentBlock.Hash())
 		if err := bc.repair(&currentBlock); err != nil {
@@ -401,7 +412,7 @@ func (bc *BlockChain) SetHead(head uint64) error {
 			if newHeadBlock == nil {
 				newHeadBlock = bc.genesisBlock
 			} else {
-				if _, err := state.New(newHeadBlock.Root(), bc.stateCache); err != nil {
+				if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil {
 					// Rewound state missing, rolled back to before pivot, reset to genesis
 					newHeadBlock = bc.genesisBlock
 				}
@@ -524,7 +535,7 @@ func (bc *BlockChain) State() (*state.StateDB, error) {
 
 // StateAt returns a new mutable state based on a particular point in time.
 func (bc *BlockChain) StateAt(root common.Hash) (*state.StateDB, error) {
-	return state.New(root, bc.stateCache)
+	return state.New(root, bc.stateCache, bc.snaps)
 }
 
 // StateCache returns the caching database underpinning the blockchain instance.
@@ -576,7 +587,7 @@ func (bc *BlockChain) ResetWithGenesisBlock(genesis *types.Block) error {
 func (bc *BlockChain) repair(head **types.Block) error {
 	for {
 		// Abort if we've rewound to a head block that does have associated state
-		if _, err := state.New((*head).Root(), bc.stateCache); err == nil {
+		if _, err := state.New((*head).Root(), bc.stateCache, bc.snaps); err == nil {
 			log.Info("Rewound blockchain to past state", "number", (*head).Number(), "hash", (*head).Hash())
 			return nil
 		}
@@ -839,6 +850,10 @@ func (bc *BlockChain) Stop() {
 
 	bc.wg.Wait()
 
+	// Ensure that the entirety of the state snapshot is journalled to disk.
+	if err := bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
+		log.Error("Failed to journal state snapshot", "err", err)
+	}
 	// Ensure the state of a recent block is also stored to disk before exiting.
 	// We're writing three different states to catch different restart scenarios:
 	//  - HEAD:     So we don't need to reprocess any blocks in the general case
@@ -1647,7 +1662,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
 		if parent == nil {
 			parent = bc.GetHeader(block.ParentHash(), block.NumberU64()-1)
 		}
-		statedb, err := state.New(parent.Root, bc.stateCache)
+		statedb, err := state.New(parent.Root, bc.stateCache, bc.snaps)
 		if err != nil {
 			return it.index, err
 		}
@@ -1656,9 +1671,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
 		var followupInterrupt uint32
 		if !bc.cacheConfig.TrieCleanNoPrefetch {
 			if followup, err := it.peek(); followup != nil && err == nil {
-				throwaway, _ := state.New(parent.Root, bc.stateCache)
+				throwaway, _ := state.New(parent.Root, bc.stateCache, bc.snaps)
 				go func(start time.Time, followup *types.Block, throwaway *state.StateDB, interrupt *uint32) {
-					bc.prefetcher.Prefetch(followup, throwaway, bc.vmConfig, interrupt)
+					bc.prefetcher.Prefetch(followup, throwaway, bc.vmConfig, &followupInterrupt)
 
 					blockPrefetchExecuteTimer.Update(time.Since(start))
 					if atomic.LoadUint32(interrupt) == 1 {
@@ -1676,14 +1691,16 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
 			return it.index, err
 		}
 		// Update the metrics touched during block processing
-		accountReadTimer.Update(statedb.AccountReads)     // Account reads are complete, we can mark them
-		storageReadTimer.Update(statedb.StorageReads)     // Storage reads are complete, we can mark them
-		accountUpdateTimer.Update(statedb.AccountUpdates) // Account updates are complete, we can mark them
-		storageUpdateTimer.Update(statedb.StorageUpdates) // Storage updates are complete, we can mark them
+		accountReadTimer.Update(statedb.AccountReads)                 // Account reads are complete, we can mark them
+		storageReadTimer.Update(statedb.StorageReads)                 // Storage reads are complete, we can mark them
+		accountUpdateTimer.Update(statedb.AccountUpdates)             // Account updates are complete, we can mark them
+		storageUpdateTimer.Update(statedb.StorageUpdates)             // Storage updates are complete, we can mark them
+		snapshotAccountReadTimer.Update(statedb.SnapshotAccountReads) // Account reads are complete, we can mark them
+		snapshotStorageReadTimer.Update(statedb.SnapshotStorageReads) // Storage reads are complete, we can mark them
 
 		triehash := statedb.AccountHashes + statedb.StorageHashes // Save to not double count in validation
-		trieproc := statedb.AccountReads + statedb.AccountUpdates
-		trieproc += statedb.StorageReads + statedb.StorageUpdates
+		trieproc := statedb.SnapshotAccountReads + statedb.AccountReads + statedb.AccountUpdates
+		trieproc += statedb.SnapshotStorageReads + statedb.StorageReads + statedb.StorageUpdates
 
 		blockExecutionTimer.Update(time.Since(substart) - trieproc - triehash)
 
@@ -1712,10 +1729,11 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
 		atomic.StoreUint32(&followupInterrupt, 1)
 
 		// Update the metrics touched during block commit
-		accountCommitTimer.Update(statedb.AccountCommits) // Account commits are complete, we can mark them
-		storageCommitTimer.Update(statedb.StorageCommits) // Storage commits are complete, we can mark them
+		accountCommitTimer.Update(statedb.AccountCommits)   // Account commits are complete, we can mark them
+		storageCommitTimer.Update(statedb.StorageCommits)   // Storage commits are complete, we can mark them
+		snapshotCommitTimer.Update(statedb.SnapshotCommits) // Snapshot commits are complete, we can mark them
 
-		blockWriteTimer.Update(time.Since(substart) - statedb.AccountCommits - statedb.StorageCommits)
+		blockWriteTimer.Update(time.Since(substart) - statedb.AccountCommits - statedb.StorageCommits - statedb.SnapshotCommits)
 		blockInsertTimer.UpdateSince(start)
 
 		switch status {
diff --git a/core/chain_makers.go b/core/chain_makers.go
index fc4f7d182..6524087d4 100644
--- a/core/chain_makers.go
+++ b/core/chain_makers.go
@@ -228,7 +228,7 @@ func GenerateChain(config *params.ChainConfig, parent *types.Block, engine conse
 		return nil, nil
 	}
 	for i := 0; i < n; i++ {
-		statedb, err := state.New(parent.Root(), state.NewDatabase(db))
+		statedb, err := state.New(parent.Root(), state.NewDatabase(db), nil)
 		if err != nil {
 			panic(err)
 		}
diff --git a/core/genesis.go b/core/genesis.go
index 92e654da8..06d347f73 100644
--- a/core/genesis.go
+++ b/core/genesis.go
@@ -178,7 +178,7 @@ func SetupGenesisBlockWithOverride(db ethdb.Database, genesis *Genesis, override
 	// We have the genesis block in database(perhaps in ancient database)
 	// but the corresponding state is missing.
 	header := rawdb.ReadHeader(db, stored, 0)
-	if _, err := state.New(header.Root, state.NewDatabaseWithCache(db, 0)); err != nil {
+	if _, err := state.New(header.Root, state.NewDatabaseWithCache(db, 0), nil); err != nil {
 		if genesis == nil {
 			genesis = DefaultGenesisBlock()
 		}
@@ -259,7 +259,7 @@ func (g *Genesis) ToBlock(db ethdb.Database) *types.Block {
 	if db == nil {
 		db = rawdb.NewMemoryDatabase()
 	}
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(db))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(db), nil)
 	for addr, account := range g.Alloc {
 		statedb.AddBalance(addr, account.Balance)
 		statedb.SetCode(addr, account.Code)
diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go
new file mode 100644
index 000000000..9989e6b50
--- /dev/null
+++ b/core/rawdb/accessors_snapshot.go
@@ -0,0 +1,99 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package rawdb
+
+import (
+	"encoding/binary"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+)
+
+// ReadSnapshotBlock retrieves the number and root of the block whose state is
+// contained in the persisted snapshot.
+func ReadSnapshotBlock(db ethdb.KeyValueReader) (uint64, common.Hash) {
+	data, _ := db.Get(snapshotBlockKey)
+	if len(data) != 8+common.HashLength {
+		return 0, common.Hash{}
+	}
+	return binary.BigEndian.Uint64(data[:8]), common.BytesToHash(data[8:])
+}
+
+// WriteSnapshotBlock stores the number and root of the block whose state is
+// contained in the persisted snapshot.
+func WriteSnapshotBlock(db ethdb.KeyValueWriter, number uint64, root common.Hash) {
+	if err := db.Put(snapshotBlockKey, append(encodeBlockNumber(number), root.Bytes()...)); err != nil {
+		log.Crit("Failed to store snapsnot block's number and root", "err", err)
+	}
+}
+
+// DeleteSnapshotBlock deletes the number and hash of the block whose state is
+// contained in the persisted snapshot. Since snapshots are not immutable, this
+// method can be used during updates, so a crash or failure will mark the entire
+// snapshot invalid.
+func DeleteSnapshotBlock(db ethdb.KeyValueWriter) {
+	if err := db.Delete(snapshotBlockKey); err != nil {
+		log.Crit("Failed to remove snapsnot block's number and hash", "err", err)
+	}
+}
+
+// ReadAccountSnapshot retrieves the snapshot entry of an account trie leaf.
+func ReadAccountSnapshot(db ethdb.KeyValueReader, hash common.Hash) []byte {
+	data, _ := db.Get(accountSnapshotKey(hash))
+	return data
+}
+
+// WriteAccountSnapshot stores the snapshot entry of an account trie leaf.
+func WriteAccountSnapshot(db ethdb.KeyValueWriter, hash common.Hash, entry []byte) {
+	if err := db.Put(accountSnapshotKey(hash), entry); err != nil {
+		log.Crit("Failed to store account snapshot", "err", err)
+	}
+}
+
+// DeleteAccountSnapshot removes the snapshot entry of an account trie leaf.
+func DeleteAccountSnapshot(db ethdb.KeyValueWriter, hash common.Hash) {
+	if err := db.Delete(accountSnapshotKey(hash)); err != nil {
+		log.Crit("Failed to delete account snapshot", "err", err)
+	}
+}
+
+// ReadStorageSnapshot retrieves the snapshot entry of an storage trie leaf.
+func ReadStorageSnapshot(db ethdb.KeyValueReader, accountHash, storageHash common.Hash) []byte {
+	data, _ := db.Get(storageSnapshotKey(accountHash, storageHash))
+	return data
+}
+
+// WriteStorageSnapshot stores the snapshot entry of an storage trie leaf.
+func WriteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash common.Hash, entry []byte) {
+	if err := db.Put(storageSnapshotKey(accountHash, storageHash), entry); err != nil {
+		log.Crit("Failed to store storage snapshot", "err", err)
+	}
+}
+
+// DeleteStorageSnapshot removes the snapshot entry of an storage trie leaf.
+func DeleteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash common.Hash) {
+	if err := db.Delete(storageSnapshotKey(accountHash, storageHash)); err != nil {
+		log.Crit("Failed to delete storage snapshot", "err", err)
+	}
+}
+
+// IterateStorageSnapshots returns an iterator for walking the entire storage
+// space of a specific account.
+func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash) ethdb.Iterator {
+	return db.NewIteratorWithPrefix(storageSnapshotsKey(accountHash))
+}
diff --git a/core/rawdb/database.go b/core/rawdb/database.go
index 838c08435..7abd07359 100644
--- a/core/rawdb/database.go
+++ b/core/rawdb/database.go
@@ -239,6 +239,8 @@ func InspectDatabase(db ethdb.Database) error {
 		hashNumPairing  common.StorageSize
 		trieSize        common.StorageSize
 		txlookupSize    common.StorageSize
+		accountSnapSize common.StorageSize
+		storageSnapSize common.StorageSize
 		preimageSize    common.StorageSize
 		bloomBitsSize   common.StorageSize
 		cliqueSnapsSize common.StorageSize
@@ -280,6 +282,10 @@ func InspectDatabase(db ethdb.Database) error {
 			receiptSize += size
 		case bytes.HasPrefix(key, txLookupPrefix) && len(key) == (len(txLookupPrefix)+common.HashLength):
 			txlookupSize += size
+		case bytes.HasPrefix(key, StateSnapshotPrefix) && len(key) == (len(StateSnapshotPrefix)+common.HashLength):
+			accountSnapSize += size
+		case bytes.HasPrefix(key, StateSnapshotPrefix) && len(key) == (len(StateSnapshotPrefix)+2*common.HashLength):
+			storageSnapSize += size
 		case bytes.HasPrefix(key, preimagePrefix) && len(key) == (len(preimagePrefix)+common.HashLength):
 			preimageSize += size
 		case bytes.HasPrefix(key, bloomBitsPrefix) && len(key) == (len(bloomBitsPrefix)+10+common.HashLength):
@@ -331,6 +337,8 @@ func InspectDatabase(db ethdb.Database) error {
 		{"Key-Value store", "Bloombit index", bloomBitsSize.String()},
 		{"Key-Value store", "Trie nodes", trieSize.String()},
 		{"Key-Value store", "Trie preimages", preimageSize.String()},
+		{"Key-Value store", "Account snapshot", accountSnapSize.String()},
+		{"Key-Value store", "Storage snapshot", storageSnapSize.String()},
 		{"Key-Value store", "Clique snapshots", cliqueSnapsSize.String()},
 		{"Key-Value store", "Singleton metadata", metadata.String()},
 		{"Ancient store", "Headers", ancientHeaders.String()},
diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go
index a44a2c99f..8e611246a 100644
--- a/core/rawdb/schema.go
+++ b/core/rawdb/schema.go
@@ -41,6 +41,9 @@ var (
 	// fastTrieProgressKey tracks the number of trie entries imported during fast sync.
 	fastTrieProgressKey = []byte("TrieSync")
 
+	// snapshotBlockKey tracks the number and hash of the last snapshot.
+	snapshotBlockKey = []byte("SnapshotBlock")
+
 	// Data item prefixes (use single byte to avoid mixing data types, avoid `i`, used for indexes).
 	headerPrefix       = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header
 	headerTDSuffix     = []byte("t") // headerPrefix + num (uint64 big endian) + hash + headerTDSuffix -> td
@@ -50,8 +53,9 @@ var (
 	blockBodyPrefix     = []byte("b") // blockBodyPrefix + num (uint64 big endian) + hash -> block body
 	blockReceiptsPrefix = []byte("r") // blockReceiptsPrefix + num (uint64 big endian) + hash -> block receipts
 
-	txLookupPrefix  = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata
-	bloomBitsPrefix = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits
+	txLookupPrefix      = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata
+	bloomBitsPrefix     = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits
+	StateSnapshotPrefix = []byte("s") // StateSnapshotPrefix + account hash [+ storage hash] -> account/storage trie value
 
 	preimagePrefix = []byte("secure-key-")      // preimagePrefix + hash -> preimage
 	configPrefix   = []byte("ethereum-config-") // config prefix for the db
@@ -145,6 +149,21 @@ func txLookupKey(hash common.Hash) []byte {
 	return append(txLookupPrefix, hash.Bytes()...)
 }
 
+// accountSnapshotKey = StateSnapshotPrefix + hash
+func accountSnapshotKey(hash common.Hash) []byte {
+	return append(StateSnapshotPrefix, hash.Bytes()...)
+}
+
+// storageSnapshotKey = StateSnapshotPrefix + account hash + storage hash
+func storageSnapshotKey(accountHash, storageHash common.Hash) []byte {
+	return append(append(StateSnapshotPrefix, accountHash.Bytes()...), storageHash.Bytes()...)
+}
+
+// storageSnapshotsKey = StateSnapshotPrefix + account hash + storage hash
+func storageSnapshotsKey(accountHash common.Hash) []byte {
+	return append(StateSnapshotPrefix, accountHash.Bytes()...)
+}
+
 // bloomBitsKey = bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash
 func bloomBitsKey(bit uint, section uint64, hash common.Hash) []byte {
 	key := append(append(bloomBitsPrefix, make([]byte, 10)...), hash.Bytes()...)
diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go
new file mode 100644
index 000000000..1068dc2a0
--- /dev/null
+++ b/core/state/snapshot/account.go
@@ -0,0 +1,54 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"math/big"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+// Account is a slim version of a state.Account, where the root and code hash
+// are replaced with a nil byte slice for empty accounts.
+type Account struct {
+	Nonce    uint64
+	Balance  *big.Int
+	Root     []byte
+	CodeHash []byte
+}
+
+// AccountRLP converts a state.Account content into a slim snapshot version RLP
+// encoded.
+func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte {
+	slim := Account{
+		Nonce:   nonce,
+		Balance: balance,
+	}
+	if root != emptyRoot {
+		slim.Root = root[:]
+	}
+	if !bytes.Equal(codehash, emptyCode[:]) {
+		slim.CodeHash = codehash
+	}
+	data, err := rlp.EncodeToBytes(slim)
+	if err != nil {
+		panic(err)
+	}
+	return data
+}
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
new file mode 100644
index 000000000..f163feb56
--- /dev/null
+++ b/core/state/snapshot/difflayer.go
@@ -0,0 +1,337 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"fmt"
+	"sort"
+	"sync"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+// diffLayer represents a collection of modifications made to a state snapshot
+// after running a block on top. It contains one sorted list for the account trie
+// and one-one list for each storage tries.
+//
+// The goal of a diff layer is to act as a journal, tracking recent modifications
+// made to the state, that have not yet graduated into a semi-immutable state.
+type diffLayer struct {
+	parent snapshot // Parent snapshot modified by this one, never nil
+	memory uint64   // Approximate guess as to how much memory we use
+
+	number uint64      // Block number to which this snapshot diff belongs to
+	root   common.Hash // Root hash to which this snapshot diff belongs to
+
+	accountList   []common.Hash                          // List of account for iteration, might not be sorted yet (lazy)
+	accountSorted bool                                   // Flag whether the account list has alreayd been sorted or not
+	accountData   map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
+	storageList   map[common.Hash][]common.Hash          // List of storage slots for iterated retrievals, one per account
+	storageSorted map[common.Hash]bool                   // Flag whether the storage slot list has alreayd been sorted or not
+	storageData   map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted)
+
+	lock sync.RWMutex
+}
+
+// newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
+// level persistent database or a hierarchical diff already.
+func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	// Create the new layer with some pre-allocated data segments
+	dl := &diffLayer{
+		parent:      parent,
+		number:      number,
+		root:        root,
+		accountData: accounts,
+		storageData: storage,
+	}
+	// Fill the account hashes and sort them for the iterator
+	accountList := make([]common.Hash, 0, len(accounts))
+	for hash, data := range accounts {
+		accountList = append(accountList, hash)
+		dl.memory += uint64(len(data))
+	}
+	sort.Sort(hashes(accountList))
+	dl.accountList = accountList
+	dl.accountSorted = true
+
+	dl.memory += uint64(len(dl.accountList) * common.HashLength)
+
+	// Fill the storage hashes and sort them for the iterator
+	dl.storageList = make(map[common.Hash][]common.Hash, len(storage))
+	dl.storageSorted = make(map[common.Hash]bool, len(storage))
+
+	for accountHash, slots := range storage {
+		// If the slots are nil, sanity check that it's a deleted account
+		if slots == nil {
+			// Ensure that the account was just marked as deleted
+			if account, ok := accounts[accountHash]; account != nil || !ok {
+				panic(fmt.Sprintf("storage in %#x nil, but account conflicts (%#x, exists: %v)", accountHash, account, ok))
+			}
+			// Everything ok, store the deletion mark and continue
+			dl.storageList[accountHash] = nil
+			continue
+		}
+		// Storage slots are not nil so entire contract was not deleted, ensure the
+		// account was just updated.
+		if account, ok := accounts[accountHash]; account == nil || !ok {
+			log.Error(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
+			//panic(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
+		}
+		// Fill the storage hashes for this account and sort them for the iterator
+		storageList := make([]common.Hash, 0, len(slots))
+		for storageHash, data := range slots {
+			storageList = append(storageList, storageHash)
+			dl.memory += uint64(len(data))
+		}
+		sort.Sort(hashes(storageList))
+		dl.storageList[accountHash] = storageList
+		dl.storageSorted[accountHash] = true
+
+		dl.memory += uint64(len(storageList) * common.HashLength)
+	}
+	dl.memory += uint64(len(dl.storageList) * common.HashLength)
+
+	return dl
+}
+
+// Info returns the block number and root hash for which this snapshot was made.
+func (dl *diffLayer) Info() (uint64, common.Hash) {
+	return dl.number, dl.root
+}
+
+// Account directly retrieves the account associated with a particular hash in
+// the snapshot slim data format.
+func (dl *diffLayer) Account(hash common.Hash) *Account {
+	data := dl.AccountRLP(hash)
+	if len(data) == 0 { // can be both nil and []byte{}
+		return nil
+	}
+	account := new(Account)
+	if err := rlp.DecodeBytes(data, account); err != nil {
+		panic(err)
+	}
+	return account
+}
+
+// AccountRLP directly retrieves the account RLP associated with a particular
+// hash in the snapshot slim data format.
+func (dl *diffLayer) AccountRLP(hash common.Hash) []byte {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the account is known locally, return it. Note, a nil account means it was
+	// deleted, and is a different notion than an unknown account!
+	if data, ok := dl.accountData[hash]; ok {
+		return data
+	}
+	// Account unknown to this diff, resolve from parent
+	return dl.parent.AccountRLP(hash)
+}
+
+// Storage directly retrieves the storage data associated with a particular hash,
+// within a particular account. If the slot is unknown to this diff, it's parent
+// is consulted.
+func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) []byte {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the account is known locally, try to resolve the slot locally. Note, a nil
+	// account means it was deleted, and is a different notion than an unknown account!
+	if storage, ok := dl.storageData[accountHash]; ok {
+		if storage == nil {
+			return nil
+		}
+		if data, ok := storage[storageHash]; ok {
+			return data
+		}
+	}
+	// Account - or slot within - unknown to this diff, resolve from parent
+	return dl.parent.Storage(accountHash, storageHash)
+}
+
+// Update creates a new layer on top of the existing snapshot diff tree with
+// the specified data items.
+func (dl *diffLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	return newDiffLayer(dl, dl.number+1, blockRoot, accounts, storage)
+}
+
+// Cap traverses downwards the diff tree until the number of allowed layers are
+// crossed. All diffs beyond the permitted number are flattened downwards. If
+// the layer limit is reached, memory cap is also enforced (but not before). The
+// block numbers for the disk layer and first diff layer are returned for GC.
+func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) {
+	// Dive until we run out of layers or reach the persistent database
+	if layers > 2 {
+		// If we still have diff layers below, recurse
+		if parent, ok := dl.parent.(*diffLayer); ok {
+			return parent.Cap(layers-1, memory)
+		}
+		// Diff stack too shallow, return block numbers without modifications
+		return dl.parent.(*diskLayer).number, dl.number
+	}
+	// We're out of layers, flatten anything below, stopping if it's the disk or if
+	// the memory limit is not yet exceeded.
+	switch parent := dl.parent.(type) {
+	case *diskLayer:
+		return parent.number, dl.number
+	case *diffLayer:
+		dl.lock.Lock()
+		defer dl.lock.Unlock()
+
+		dl.parent = parent.flatten()
+		if dl.parent.(*diffLayer).memory < memory {
+			diskNumber, _ := parent.parent.Info()
+			return diskNumber, parent.number
+		}
+	default:
+		panic(fmt.Sprintf("unknown data layer: %T", parent))
+	}
+	// If the bottommost layer is larger than our memory cap, persist to disk
+	var (
+		parent = dl.parent.(*diffLayer)
+		base   = parent.parent.(*diskLayer)
+		batch  = base.db.NewBatch()
+	)
+	parent.lock.RLock()
+	defer parent.lock.RUnlock()
+
+	// Start by temporarilly deleting the current snapshot block marker. This
+	// ensures that in the case of a crash, the entire snapshot is invalidated.
+	rawdb.DeleteSnapshotBlock(batch)
+
+	// Push all the accounts into the database
+	for hash, data := range parent.accountData {
+		if len(data) > 0 {
+			// Account was updated, push to disk
+			rawdb.WriteAccountSnapshot(batch, hash, data)
+			base.cache.Set(string(hash[:]), data)
+
+			if batch.ValueSize() > ethdb.IdealBatchSize {
+				if err := batch.Write(); err != nil {
+					log.Crit("Failed to write account snapshot", "err", err)
+				}
+				batch.Reset()
+			}
+		} else {
+			// Account was deleted, remove all storage slots too
+			rawdb.DeleteAccountSnapshot(batch, hash)
+			base.cache.Set(string(hash[:]), nil)
+
+			it := rawdb.IterateStorageSnapshots(base.db, hash)
+			for it.Next() {
+				if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
+					batch.Delete(key)
+					base.cache.Delete(string(key[1:]))
+				}
+			}
+			it.Release()
+		}
+	}
+	// Push all the storage slots into the database
+	for accountHash, storage := range parent.storageData {
+		for storageHash, data := range storage {
+			if len(data) > 0 {
+				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
+				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), data)
+			} else {
+				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
+				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), nil)
+			}
+		}
+		if batch.ValueSize() > ethdb.IdealBatchSize {
+			if err := batch.Write(); err != nil {
+				log.Crit("Failed to write storage snapshot", "err", err)
+			}
+			batch.Reset()
+		}
+	}
+	// Update the snapshot block marker and write any remainder data
+	base.number, base.root = parent.number, parent.root
+
+	rawdb.WriteSnapshotBlock(batch, base.number, base.root)
+	if err := batch.Write(); err != nil {
+		log.Crit("Failed to write leftover snapshot", "err", err)
+	}
+	dl.parent = base
+
+	return base.number, dl.number
+}
+
+// flatten pushes all data from this point downwards, flattening everything into
+// a single diff at the bottom. Since usually the lowermost diff is the largest,
+// the flattening bulds up from there in reverse.
+func (dl *diffLayer) flatten() snapshot {
+	// If the parent is not diff, we're the first in line, return unmodified
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		return dl
+	}
+	// Parent is a diff, flatten it first (note, apart from weird corned cases,
+	// flatten will realistically only ever merge 1 layer, so there's no need to
+	// be smarter about grouping flattens together).
+	parent = parent.flatten().(*diffLayer)
+
+	// Overwrite all the updated accounts blindly, merge the sorted list
+	for hash, data := range dl.accountData {
+		parent.accountData[hash] = data
+	}
+	parent.accountList = append(parent.accountList, dl.accountList...) // TODO(karalabe): dedup!!
+	parent.accountSorted = false
+
+	// Overwrite all the updates storage slots (individually)
+	for accountHash, storage := range dl.storageData {
+		// If storage didn't exist (or was deleted) in the parent; or if the storage
+		// was freshly deleted in the child, overwrite blindly
+		if parent.storageData[accountHash] == nil || storage == nil {
+			parent.storageList[accountHash] = dl.storageList[accountHash]
+			parent.storageData[accountHash] = storage
+			continue
+		}
+		// Storage exists in both parent and child, merge the slots
+		comboData := parent.storageData[accountHash]
+		for storageHash, data := range storage {
+			comboData[storageHash] = data
+		}
+		parent.storageData[accountHash] = comboData
+		parent.storageList[accountHash] = append(parent.storageList[accountHash], dl.storageList[accountHash]...) // TODO(karalabe): dedup!!
+		parent.storageSorted[accountHash] = false
+	}
+	// Return the combo parent
+	parent.number = dl.number
+	parent.root = dl.root
+	parent.memory += dl.memory
+	return parent
+}
+
+// Journal commits an entire diff hierarchy to disk into a single journal file.
+// This is meant to be used during shutdown to persist the snapshot without
+// flattening everything down (bad for reorgs).
+func (dl *diffLayer) Journal() error {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	writer, err := dl.journal()
+	if err != nil {
+		return err
+	}
+	writer.Close()
+	return nil
+}
diff --git a/core/state/snapshot/difflayer_journal.go b/core/state/snapshot/difflayer_journal.go
new file mode 100644
index 000000000..844ee8859
--- /dev/null
+++ b/core/state/snapshot/difflayer_journal.go
@@ -0,0 +1,140 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+// journalAccount is an account entry in a diffLayer's disk journal.
+type journalAccount struct {
+	Hash common.Hash
+	Blob []byte
+}
+
+// journalStorage is an account's storage map in a diffLayer's disk journal.
+type journalStorage struct {
+	Hash common.Hash
+	Keys []common.Hash
+	Vals [][]byte
+}
+
+// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
+// diff and verifying that it can be linked to the requested parent.
+func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
+	// Read the next diff journal entry
+	var (
+		number uint64
+		root   common.Hash
+	)
+	if err := r.Decode(&number); err != nil {
+		// The first read may fail with EOF, marking the end of the journal
+		if err == io.EOF {
+			return parent, nil
+		}
+		return nil, fmt.Errorf("load diff number: %v", err)
+	}
+	if err := r.Decode(&root); err != nil {
+		return nil, fmt.Errorf("load diff root: %v", err)
+	}
+	var accounts []journalAccount
+	if err := r.Decode(&accounts); err != nil {
+		return nil, fmt.Errorf("load diff accounts: %v", err)
+	}
+	accountData := make(map[common.Hash][]byte)
+	for _, entry := range accounts {
+		accountData[entry.Hash] = entry.Blob
+	}
+	var storage []journalStorage
+	if err := r.Decode(&storage); err != nil {
+		return nil, fmt.Errorf("load diff storage: %v", err)
+	}
+	storageData := make(map[common.Hash]map[common.Hash][]byte)
+	for _, entry := range storage {
+		slots := make(map[common.Hash][]byte)
+		for i, key := range entry.Keys {
+			slots[key] = entry.Vals[i]
+		}
+		storageData[entry.Hash] = slots
+	}
+	// Validate the block number to avoid state corruption
+	if parent, ok := parent.(*diffLayer); ok {
+		if number != parent.number+1 {
+			return nil, fmt.Errorf("snapshot chain broken: block #%d after #%d", number, parent.number)
+		}
+	}
+	return loadDiffLayer(newDiffLayer(parent, number, root, accountData, storageData), r)
+}
+
+// journal is the internal version of Journal that also returns the journal file
+// so subsequent layers know where to write to.
+func (dl *diffLayer) journal() (io.WriteCloser, error) {
+	// If we've reached the bottom, open the journal
+	var writer io.WriteCloser
+	if parent, ok := dl.parent.(*diskLayer); ok {
+		file, err := os.Create(parent.journal)
+		if err != nil {
+			return nil, err
+		}
+		writer = file
+	}
+	// If we haven't reached the bottom yet, journal the parent first
+	if writer == nil {
+		file, err := dl.parent.(*diffLayer).journal()
+		if err != nil {
+			return nil, err
+		}
+		writer = file
+	}
+	// Everything below was journalled, persist this layer too
+	if err := rlp.Encode(writer, dl.number); err != nil {
+		writer.Close()
+		return nil, err
+	}
+	if err := rlp.Encode(writer, dl.root); err != nil {
+		writer.Close()
+		return nil, err
+	}
+	accounts := make([]journalAccount, 0, len(dl.accountData))
+	for hash, blob := range dl.accountData {
+		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
+	}
+	if err := rlp.Encode(writer, accounts); err != nil {
+		writer.Close()
+		return nil, err
+	}
+	storage := make([]journalStorage, 0, len(dl.storageData))
+	for hash, slots := range dl.storageData {
+		keys := make([]common.Hash, 0, len(slots))
+		vals := make([][]byte, 0, len(slots))
+		for key, val := range slots {
+			keys = append(keys, key)
+			vals = append(vals, val)
+		}
+		storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
+	}
+	if err := rlp.Encode(writer, storage); err != nil {
+		writer.Close()
+		return nil, err
+	}
+	return writer, nil
+}
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
new file mode 100644
index 000000000..0406d298f
--- /dev/null
+++ b/core/state/snapshot/disklayer.go
@@ -0,0 +1,115 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"github.com/allegro/bigcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+// diskLayer is a low level persistent snapshot built on top of a key-value store.
+type diskLayer struct {
+	journal string              // Path of the snapshot journal to use on shutdown
+	db      ethdb.KeyValueStore // Key-value store containing the base snapshot
+	cache   *bigcache.BigCache  // Cache to avoid hitting the disk for direct access
+
+	number uint64      // Block number of the base snapshot
+	root   common.Hash // Root hash of the base snapshot
+}
+
+// Info returns the block number and root hash for which this snapshot was made.
+func (dl *diskLayer) Info() (uint64, common.Hash) {
+	return dl.number, dl.root
+}
+
+// Account directly retrieves the account associated with a particular hash in
+// the snapshot slim data format.
+func (dl *diskLayer) Account(hash common.Hash) *Account {
+	data := dl.AccountRLP(hash)
+	if len(data) == 0 { // can be both nil and []byte{}
+		return nil
+	}
+	account := new(Account)
+	if err := rlp.DecodeBytes(data, account); err != nil {
+		panic(err)
+	}
+	return account
+}
+
+// AccountRLP directly retrieves the account RLP associated with a particular
+// hash in the snapshot slim data format.
+func (dl *diskLayer) AccountRLP(hash common.Hash) []byte {
+	key := string(hash[:])
+
+	// Try to retrieve the account from the memory cache
+	if blob, err := dl.cache.Get(key); err == nil {
+		snapshotCleanHitMeter.Mark(1)
+		snapshotCleanReadMeter.Mark(int64(len(blob)))
+		return blob
+	}
+	// Cache doesn't contain account, pull from disk and cache for later
+	blob := rawdb.ReadAccountSnapshot(dl.db, hash)
+	dl.cache.Set(key, blob)
+
+	snapshotCleanMissMeter.Mark(1)
+	snapshotCleanWriteMeter.Mark(int64(len(blob)))
+
+	return blob
+}
+
+// Storage directly retrieves the storage data associated with a particular hash,
+// within a particular account.
+func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) []byte {
+	key := string(append(accountHash[:], storageHash[:]...))
+
+	// Try to retrieve the storage slot from the memory cache
+	if blob, err := dl.cache.Get(key); err == nil {
+		snapshotCleanHitMeter.Mark(1)
+		snapshotCleanReadMeter.Mark(int64(len(blob)))
+		return blob
+	}
+	// Cache doesn't contain storage slot, pull from disk and cache for later
+	blob := rawdb.ReadStorageSnapshot(dl.db, accountHash, storageHash)
+	dl.cache.Set(key, blob)
+
+	snapshotCleanMissMeter.Mark(1)
+	snapshotCleanWriteMeter.Mark(int64(len(blob)))
+
+	return blob
+}
+
+// Update creates a new layer on top of the existing snapshot diff tree with
+// the specified data items. Note, the maps are retained by the method to avoid
+// copying everything.
+func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	return newDiffLayer(dl, dl.number+1, blockHash, accounts, storage)
+}
+
+// Cap traverses downwards the diff tree until the number of allowed layers are
+// crossed. All diffs beyond the permitted number are flattened downwards.
+func (dl *diskLayer) Cap(layers int, memory uint64) (uint64, uint64) {
+	return dl.number, dl.number
+}
+
+// Journal commits an entire diff hierarchy to disk into a single journal file.
+func (dl *diskLayer) Journal() error {
+	// There's no journalling a disk layer
+	return nil
+}
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
new file mode 100644
index 000000000..0d451fe50
--- /dev/null
+++ b/core/state/snapshot/generate.go
@@ -0,0 +1,212 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"fmt"
+	"math/big"
+	"time"
+
+	"github.com/allegro/bigcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+var (
+	// emptyRoot is the known root hash of an empty trie.
+	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
+
+	// emptyCode is the known hash of the empty EVM bytecode.
+	emptyCode = crypto.Keccak256Hash(nil)
+)
+
+// wipeSnapshot iterates over the entire key-value database and deletes all the
+// data associated with the snapshot (accounts, storage, metadata). After all is
+// done, the snapshot range of the database is compacted to free up unused data
+// blocks.
+func wipeSnapshot(db ethdb.KeyValueStore) error {
+	// Batch deletions together to avoid holding an iterator for too long
+	var (
+		batch = db.NewBatch()
+		items int
+	)
+	// Iterate over the snapshot key-range and delete all of them
+	log.Info("Deleting previous snapshot leftovers")
+	start, logged := time.Now(), time.Now()
+
+	it := db.NewIteratorWithStart(rawdb.StateSnapshotPrefix)
+	for it.Next() {
+		// Skip any keys with the correct prefix but wrong lenth (trie nodes)
+		key := it.Key()
+		if !bytes.HasPrefix(key, rawdb.StateSnapshotPrefix) {
+			break
+		}
+		if len(key) != len(rawdb.StateSnapshotPrefix)+common.HashLength && len(key) != len(rawdb.StateSnapshotPrefix)+2*common.HashLength {
+			continue
+		}
+		// Delete the key and periodically recreate the batch and iterator
+		batch.Delete(key)
+		items++
+
+		if items%10000 == 0 {
+			// Batch too large (or iterator too long lived, flush and recreate)
+			it.Release()
+			if err := batch.Write(); err != nil {
+				return err
+			}
+			batch.Reset()
+			it = db.NewIteratorWithStart(key)
+
+			if time.Since(logged) > 8*time.Second {
+				log.Info("Deleting previous snapshot leftovers", "wiped", items, "elapsed", time.Since(start))
+				logged = time.Now()
+			}
+		}
+	}
+	it.Release()
+
+	rawdb.DeleteSnapshotBlock(batch)
+	if err := batch.Write(); err != nil {
+		return err
+	}
+	log.Info("Deleted previous snapshot leftovers", "wiped", items, "elapsed", time.Since(start))
+
+	// Compact the snapshot section of the database to get rid of unused space
+	log.Info("Compacting snapshot area in database")
+	start = time.Now()
+
+	end := common.CopyBytes(rawdb.StateSnapshotPrefix)
+	end[len(end)-1]++
+
+	if err := db.Compact(rawdb.StateSnapshotPrefix, end); err != nil {
+		return err
+	}
+	log.Info("Compacted snapshot area in database", "elapsed", time.Since(start))
+
+	return nil
+}
+
+// generateSnapshot regenerates a brand new snapshot based on an existing state database and head block.
+func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (snapshot, error) {
+	// Wipe any previously existing snapshot from the database
+	if err := wipeSnapshot(db); err != nil {
+		return nil, err
+	}
+	// Iterate the entire storage trie and re-generate the state snapshot
+	var (
+		accountCount int
+		storageCount int
+		storageNodes int
+		accountSize  common.StorageSize
+		storageSize  common.StorageSize
+		logged       time.Time
+	)
+	batch := db.NewBatch()
+	triedb := trie.NewDatabase(db)
+
+	accTrie, err := trie.NewSecure(headRoot, triedb)
+	if err != nil {
+		return nil, err
+	}
+	accIt := trie.NewIterator(accTrie.NodeIterator(nil))
+	for accIt.Next() {
+		var (
+			curStorageCount int
+			curStorageNodes int
+			curAccountSize  common.StorageSize
+			curStorageSize  common.StorageSize
+		)
+		var acc struct {
+			Nonce    uint64
+			Balance  *big.Int
+			Root     common.Hash
+			CodeHash []byte
+		}
+		if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil {
+			return nil, err
+		}
+		data := AccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
+		curAccountSize += common.StorageSize(1 + common.HashLength + len(data))
+
+		rawdb.WriteAccountSnapshot(batch, common.BytesToHash(accIt.Key), data)
+		if batch.ValueSize() > ethdb.IdealBatchSize {
+			batch.Write()
+			batch.Reset()
+		}
+		if acc.Root != emptyRoot {
+			storeTrie, err := trie.NewSecure(acc.Root, triedb)
+			if err != nil {
+				return nil, err
+			}
+			storeIt := trie.NewIterator(storeTrie.NodeIterator(nil))
+			for storeIt.Next() {
+				curStorageSize += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value))
+				curStorageCount++
+
+				rawdb.WriteStorageSnapshot(batch, common.BytesToHash(accIt.Key), common.BytesToHash(storeIt.Key), storeIt.Value)
+				if batch.ValueSize() > ethdb.IdealBatchSize {
+					batch.Write()
+					batch.Reset()
+				}
+			}
+			curStorageNodes = storeIt.Nodes
+		}
+		accountCount++
+		storageCount += curStorageCount
+		accountSize += curAccountSize
+		storageSize += curStorageSize
+		storageNodes += curStorageNodes
+
+		if time.Since(logged) > 8*time.Second {
+			fmt.Printf("%#x: %9s + %9s (%6d slots, %6d nodes), total %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accIt.Key, curAccountSize.TerminalString(), curStorageSize.TerminalString(), curStorageCount, curStorageNodes, accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes)
+			logged = time.Now()
+		}
+	}
+	fmt.Printf("Totals: %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes)
+
+	// Update the snapshot block marker and write any remainder data
+	rawdb.WriteSnapshotBlock(batch, headNumber, headRoot)
+	batch.Write()
+	batch.Reset()
+
+	// Compact the snapshot section of the database to get rid of unused space
+	log.Info("Compacting snapshot in chain database")
+	if err := db.Compact([]byte{'s'}, []byte{'s' + 1}); err != nil {
+		return nil, err
+	}
+	// New snapshot generated, construct a brand new base layer
+	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
+		Shards:             1024,
+		LifeWindow:         time.Hour,
+		MaxEntriesInWindow: 512 * 1024,
+		MaxEntrySize:       512,
+		HardMaxCacheSize:   512,
+	})
+	return &diskLayer{
+		journal: journal,
+		db:      db,
+		cache:   cache,
+		number:  headNumber,
+		root:    headRoot,
+	}, nil
+}
diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go
new file mode 100644
index 000000000..1206445c5
--- /dev/null
+++ b/core/state/snapshot/generate_test.go
@@ -0,0 +1,111 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+)
+
+// randomHash generates a random blob of data and returns it as a hash.
+func randomHash() common.Hash {
+	var hash common.Hash
+	if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil {
+		panic(err)
+	}
+	return hash
+}
+
+// Tests that given a database with random data content, all parts of a snapshot
+// can be crrectly wiped without touching anything else.
+func TestWipe(t *testing.T) {
+	// Create a database with some random snapshot data
+	db := memorydb.New()
+
+	for i := 0; i < 128; i++ {
+		account := randomHash()
+		rawdb.WriteAccountSnapshot(db, account, randomHash().Bytes())
+		for j := 0; j < 1024; j++ {
+			rawdb.WriteStorageSnapshot(db, account, randomHash(), randomHash().Bytes())
+		}
+	}
+	rawdb.WriteSnapshotBlock(db, 123, randomHash())
+
+	// Add some random non-snapshot data too to make wiping harder
+	for i := 0; i < 65536; i++ {
+		// Generate a key that's the wrong length for a state snapshot item
+		var keysize int
+		for keysize == 0 || keysize == 32 || keysize == 64 {
+			keysize = 8 + rand.Intn(64) // +8 to ensure we will "never" randomize duplicates
+		}
+		// Randomize the suffix, dedup and inject it under the snapshot namespace
+		keysuffix := make([]byte, keysize)
+		rand.Read(keysuffix)
+		db.Put(append(rawdb.StateSnapshotPrefix, keysuffix...), randomHash().Bytes())
+	}
+	// Sanity check that all the keys are present
+	var items int
+
+	it := db.NewIteratorWithPrefix(rawdb.StateSnapshotPrefix)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.StateSnapshotPrefix)+32 || len(key) == len(rawdb.StateSnapshotPrefix)+64 {
+			items++
+		}
+	}
+	if items != 128+128*1024 {
+		t.Fatalf("snapshot size mismatch: have %d, want %d", items, 128+128*1024)
+	}
+	if number, hash := rawdb.ReadSnapshotBlock(db); number != 123 || hash == (common.Hash{}) {
+		t.Errorf("snapshot block marker mismatch: have #%d [%#x], want #%d [<not-nil>]", number, hash, 123)
+	}
+	// Wipe all snapshot entries from the database
+	if err := wipeSnapshot(db); err != nil {
+		t.Fatalf("failed to wipe snapshot: %v", err)
+	}
+	// Iterate over the database end ensure no snapshot information remains
+	it = db.NewIteratorWithPrefix(rawdb.StateSnapshotPrefix)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.StateSnapshotPrefix)+32 || len(key) == len(rawdb.StateSnapshotPrefix)+64 {
+			t.Errorf("snapshot entry remained after wipe: %x", key)
+		}
+	}
+	if number, hash := rawdb.ReadSnapshotBlock(db); number != 0 || hash != (common.Hash{}) {
+		t.Errorf("snapshot block marker remained after wipe: #%d [%#x]", number, hash)
+	}
+	// Iterate over the database and ensure miscellaneous items are present
+	items = 0
+
+	it = db.NewIterator()
+	defer it.Release()
+
+	for it.Next() {
+		items++
+	}
+	if items != 65536 {
+		t.Fatalf("misc item count mismatch: have %d, want %d", items, 65536)
+	}
+}
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
new file mode 100644
index 000000000..6d4df96da
--- /dev/null
+++ b/core/state/snapshot/snapshot.go
@@ -0,0 +1,244 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package snapshot implements a journalled, dynamic state dump.
+package snapshot
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/allegro/bigcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/metrics"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+var (
+	snapshotCleanHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/hit", nil)
+	snapshotCleanMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/miss", nil)
+	snapshotCleanReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/read", nil)
+	snapshotCleanWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/write", nil)
+)
+
+// Snapshot represents the functionality supported by a snapshot storage layer.
+type Snapshot interface {
+	// Info returns the block number and root hash for which this snapshot was made.
+	Info() (uint64, common.Hash)
+
+	// Account directly retrieves the account associated with a particular hash in
+	// the snapshot slim data format.
+	Account(hash common.Hash) *Account
+
+	// AccountRLP directly retrieves the account RLP associated with a particular
+	// hash in the snapshot slim data format.
+	AccountRLP(hash common.Hash) []byte
+
+	// Storage directly retrieves the storage data associated with a particular hash,
+	// within a particular account.
+	Storage(accountHash, storageHash common.Hash) []byte
+}
+
+// snapshot is the internal version of the snapshot data layer that supports some
+// additional methods compared to the public API.
+type snapshot interface {
+	Snapshot
+
+	// Update creates a new layer on top of the existing snapshot diff tree with
+	// the specified data items. Note, the maps are retained by the method to avoid
+	// copying everything.
+	Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
+
+	// Cap traverses downwards the diff tree until the number of allowed layers are
+	// crossed. All diffs beyond the permitted number are flattened downwards. The
+	// block numbers for the disk layer and first diff layer are returned for GC.
+	Cap(layers int, memory uint64) (uint64, uint64)
+
+	// Journal commits an entire diff hierarchy to disk into a single journal file.
+	// This is meant to be used during shutdown to persist the snapshot without
+	// flattening everything down (bad for reorgs).
+	Journal() error
+}
+
+// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
+// base layer backed by a key-value store, on top of which arbitrarilly many in-
+// memory diff layers are topped. The memory diffs can form a tree with branching,
+// but the disk layer is singleton and common to all. If a reorg goes deeper than
+// the disk layer, everything needs to be deleted.
+//
+// The goal of a state snapshot is twofold: to allow direct access to account and
+// storage data to avoid expensive multi-level trie lookups; and to allow sorted,
+// cheap iteration of the account/storage tries for sync aid.
+type SnapshotTree struct {
+	layers map[common.Hash]snapshot // Collection of all known layers // TODO(karalabe): split Clique overlaps
+	lock   sync.RWMutex
+}
+
+// New attempts to load an already existing snapshot from a persistent key-value
+// store (with a number of memory layers from a journal), ensuring that the head
+// of the snapshot matches the expected one.
+//
+// If the snapshot is missing or inconsistent, the entirety is deleted and will
+// be reconstructed from scratch based on the tries in the key-value store.
+func New(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (*SnapshotTree, error) {
+	// Attempt to load a previously persisted snapshot
+	head, err := loadSnapshot(db, journal, headNumber, headRoot)
+	if err != nil {
+		log.Warn("Failed to load snapshot, regenerating", "err", err)
+		if head, err = generateSnapshot(db, journal, headNumber, headRoot); err != nil {
+			return nil, err
+		}
+	}
+	// Existing snapshot loaded or one regenerated, seed all the layers
+	snap := &SnapshotTree{
+		layers: make(map[common.Hash]snapshot),
+	}
+	for head != nil {
+		_, root := head.Info()
+		snap.layers[root] = head
+
+		switch self := head.(type) {
+		case *diffLayer:
+			head = self.parent
+		case *diskLayer:
+			head = nil
+		default:
+			panic(fmt.Sprintf("unknown data layer: %T", self))
+		}
+	}
+	return snap, nil
+}
+
+// Snapshot retrieves a snapshot belonging to the given block root, or nil if no
+// snapshot is maintained for that block.
+func (st *SnapshotTree) Snapshot(blockRoot common.Hash) Snapshot {
+	st.lock.RLock()
+	defer st.lock.RUnlock()
+
+	return st.layers[blockRoot]
+}
+
+// Update adds a new snapshot into the tree, if that can be linked to an existing
+// old parent. It is disallowed to insert a disk layer (the origin of all).
+func (st *SnapshotTree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
+	// Generate a new snapshot on top of the parent
+	parent := st.Snapshot(parentRoot).(snapshot)
+	if parent == nil {
+		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
+	}
+	snap := parent.Update(blockRoot, accounts, storage)
+
+	// Save the new snapshot for later
+	st.lock.Lock()
+	defer st.lock.Unlock()
+
+	st.layers[snap.root] = snap
+	return nil
+}
+
+// Cap traverses downwards the snapshot tree from a head block hash until the
+// number of allowed layers are crossed. All layers beyond the permitted number
+// are flattened downwards.
+func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) error {
+	// Retrieve the head snapshot to cap from
+	snap := st.Snapshot(blockRoot).(snapshot)
+	if snap == nil {
+		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
+	}
+	// Run the internal capping and discard all stale layers
+	st.lock.Lock()
+	defer st.lock.Unlock()
+
+	diskNumber, diffNumber := snap.Cap(layers, memory)
+	for root, snap := range st.layers {
+		if number, _ := snap.Info(); number != diskNumber && number < diffNumber {
+			delete(st.layers, root)
+		}
+	}
+	return nil
+}
+
+// Journal commits an entire diff hierarchy to disk into a single journal file.
+// This is meant to be used during shutdown to persist the snapshot without
+// flattening everything down (bad for reorgs).
+func (st *SnapshotTree) Journal(blockRoot common.Hash) error {
+	// Retrieve the head snapshot to journal from
+	snap := st.Snapshot(blockRoot).(snapshot)
+	if snap == nil {
+		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
+	}
+	// Run the journaling
+	st.lock.Lock()
+	defer st.lock.Unlock()
+
+	return snap.Journal()
+}
+
+// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
+func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (snapshot, error) {
+	// Retrieve the block number and hash of the snapshot, failing if no snapshot
+	// is present in the database (or crashed mid-update).
+	number, root := rawdb.ReadSnapshotBlock(db)
+	if root == (common.Hash{}) {
+		return nil, errors.New("missing or corrupted snapshot")
+	}
+	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
+		Shards:             1024,
+		LifeWindow:         time.Hour,
+		MaxEntriesInWindow: 512 * 1024,
+		MaxEntrySize:       512,
+		HardMaxCacheSize:   512,
+	})
+	base := &diskLayer{
+		journal: journal,
+		db:      db,
+		cache:   cache,
+		number:  number,
+		root:    root,
+	}
+	// Load all the snapshot diffs from the journal, failing if their chain is broken
+	// or does not lead from the disk snapshot to the specified head.
+	if _, err := os.Stat(journal); os.IsNotExist(err) {
+		// Journal doesn't exist, don't worry if it's not supposed to
+		if number != headNumber || root != headRoot {
+			return nil, fmt.Errorf("snapshot journal missing, head does't match snapshot: #%d [%#x] vs. #%d [%#x]",
+				headNumber, headRoot, number, root)
+		}
+		return base, nil
+	}
+	file, err := os.Open(journal)
+	if err != nil {
+		return nil, err
+	}
+	snapshot, err := loadDiffLayer(base, rlp.NewStream(file, 0))
+	if err != nil {
+		return nil, err
+	}
+	// Entire snapshot journal loaded, sanity check the head and return
+	// Journal doesn't exist, don't worry if it's not supposed to
+	number, root = snapshot.Info()
+	if number != headNumber || root != headRoot {
+		return nil, fmt.Errorf("head does't match snapshot: #%d [%#x] vs. #%d [%#x]",
+			headNumber, headRoot, number, root)
+	}
+	return snapshot, nil
+}
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
new file mode 100644
index 000000000..903bd4a6f
--- /dev/null
+++ b/core/state/snapshot/snapshot_test.go
@@ -0,0 +1,17 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
diff --git a/core/state/snapshot/sort.go b/core/state/snapshot/sort.go
new file mode 100644
index 000000000..04729c60b
--- /dev/null
+++ b/core/state/snapshot/sort.go
@@ -0,0 +1,62 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// hashes is a helper to implement sort.Interface.
+type hashes []common.Hash
+
+// Len is the number of elements in the collection.
+func (hs hashes) Len() int { return len(hs) }
+
+// Less reports whether the element with index i should sort before the element
+// with index j.
+func (hs hashes) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:]) < 0 }
+
+// Swap swaps the elements with indexes i and j.
+func (hs hashes) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] }
+
+// merge combines two sorted lists of hashes into a combo sorted one.
+func merge(a, b []common.Hash) []common.Hash {
+	result := make([]common.Hash, len(a)+len(b))
+
+	i := 0
+	for len(a) > 0 && len(b) > 0 {
+		if bytes.Compare(a[0][:], b[0][:]) < 0 {
+			result[i] = a[0]
+			a = a[1:]
+		} else {
+			result[i] = b[0]
+			b = b[1:]
+		}
+		i++
+	}
+	for j := 0; j < len(a); j++ {
+		result[i] = a[j]
+		i++
+	}
+	for j := 0; j < len(b); j++ {
+		result[i] = b[j]
+		i++
+	}
+	return result
+}
diff --git a/core/state/state_object.go b/core/state/state_object.go
index 667d5ec02..98be56671 100644
--- a/core/state/state_object.go
+++ b/core/state/state_object.go
@@ -195,15 +195,26 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has
 	if value, cached := s.originStorage[key]; cached {
 		return value
 	}
-	// Track the amount of time wasted on reading the storage trie
-	if metrics.EnabledExpensive {
-		defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now())
-	}
-	// Otherwise load the value from the database
-	enc, err := s.getTrie(db).TryGet(key[:])
-	if err != nil {
-		s.setError(err)
-		return common.Hash{}
+	// If no live objects are available, attempt to use snapshots
+	var (
+		enc []byte
+		err error
+	)
+	if s.db.snap != nil {
+		if metrics.EnabledExpensive {
+			defer func(start time.Time) { s.db.SnapshotStorageReads += time.Since(start) }(time.Now())
+		}
+		enc = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key[:]))
+	} else {
+		// Track the amount of time wasted on reading the storage trie
+		if metrics.EnabledExpensive {
+			defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now())
+		}
+		// Otherwise load the value from the database
+		if enc, err = s.getTrie(db).TryGet(key[:]); err != nil {
+			s.setError(err)
+			return common.Hash{}
+		}
 	}
 	var value common.Hash
 	if len(enc) > 0 {
@@ -283,6 +294,23 @@ func (s *stateObject) updateTrie(db Database) Trie {
 	if metrics.EnabledExpensive {
 		defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now())
 	}
+	// Retrieve the snapshot storage map for the object
+	var storage map[common.Hash][]byte
+	if s.db.snap != nil {
+		// Retrieve the old storage map, if available
+		s.db.snapLock.RLock()
+		storage = s.db.snapStorage[s.addrHash]
+		s.db.snapLock.RUnlock()
+
+		// If no old storage map was available, create a new one
+		if storage == nil {
+			storage = make(map[common.Hash][]byte)
+
+			s.db.snapLock.Lock()
+			s.db.snapStorage[s.addrHash] = storage
+			s.db.snapLock.Unlock()
+		}
+	}
 	// Insert all the pending updates into the trie
 	tr := s.getTrie(db)
 	for key, value := range s.pendingStorage {
@@ -292,13 +320,18 @@ func (s *stateObject) updateTrie(db Database) Trie {
 		}
 		s.originStorage[key] = value
 
+		var v []byte
 		if (value == common.Hash{}) {
 			s.setError(tr.TryDelete(key[:]))
-			continue
+		} else {
+			// Encoding []byte cannot fail, ok to ignore the error.
+			v, _ = rlp.EncodeToBytes(common.TrimLeftZeroes(value[:]))
+			s.setError(tr.TryUpdate(key[:], v))
+		}
+		// If state snapshotting is active, cache the data til commit
+		if storage != nil {
+			storage[crypto.Keccak256Hash(key[:])] = v // v will be nil if value is 0x00
 		}
-		// Encoding []byte cannot fail, ok to ignore the error.
-		v, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(value[:]))
-		s.setError(tr.TryUpdate(key[:], v))
 	}
 	if len(s.pendingStorage) > 0 {
 		s.pendingStorage = make(Storage)
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 5d40f59c6..0fb1095ce 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -22,9 +22,11 @@ import (
 	"fmt"
 	"math/big"
 	"sort"
+	"sync"
 	"time"
 
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/state/snapshot"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/log"
@@ -66,6 +68,12 @@ type StateDB struct {
 	db   Database
 	trie Trie
 
+	snaps        *snapshot.SnapshotTree
+	snap         snapshot.Snapshot
+	snapAccounts map[common.Hash][]byte
+	snapStorage  map[common.Hash]map[common.Hash][]byte
+	snapLock     sync.RWMutex // Lock for the concurrent storage updaters
+
 	// This map holds 'live' objects, which will get modified while processing a state transition.
 	stateObjects        map[common.Address]*stateObject
 	stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie
@@ -95,32 +103,43 @@ type StateDB struct {
 	nextRevisionId int
 
 	// Measurements gathered during execution for debugging purposes
-	AccountReads   time.Duration
-	AccountHashes  time.Duration
-	AccountUpdates time.Duration
-	AccountCommits time.Duration
-	StorageReads   time.Duration
-	StorageHashes  time.Duration
-	StorageUpdates time.Duration
-	StorageCommits time.Duration
+	AccountReads         time.Duration
+	AccountHashes        time.Duration
+	AccountUpdates       time.Duration
+	AccountCommits       time.Duration
+	StorageReads         time.Duration
+	StorageHashes        time.Duration
+	StorageUpdates       time.Duration
+	StorageCommits       time.Duration
+	SnapshotAccountReads time.Duration
+	SnapshotStorageReads time.Duration
+	SnapshotCommits      time.Duration
 }
 
 // Create a new state from a given trie.
-func New(root common.Hash, db Database) (*StateDB, error) {
+func New(root common.Hash, db Database, snaps *snapshot.SnapshotTree) (*StateDB, error) {
 	tr, err := db.OpenTrie(root)
 	if err != nil {
 		return nil, err
 	}
-	return &StateDB{
+	sdb := &StateDB{
 		db:                  db,
 		trie:                tr,
+		snaps:               snaps,
 		stateObjects:        make(map[common.Address]*stateObject),
 		stateObjectsPending: make(map[common.Address]struct{}),
 		stateObjectsDirty:   make(map[common.Address]struct{}),
 		logs:                make(map[common.Hash][]*types.Log),
 		preimages:           make(map[common.Hash][]byte),
 		journal:             newJournal(),
-	}, nil
+	}
+	if sdb.snaps != nil {
+		if sdb.snap = sdb.snaps.Snapshot(root); sdb.snap != nil {
+			sdb.snapAccounts = make(map[common.Hash][]byte)
+			sdb.snapStorage = make(map[common.Hash]map[common.Hash][]byte)
+		}
+	}
+	return sdb, nil
 }
 
 // setError remembers the first non-nil error it is called with.
@@ -152,6 +171,14 @@ func (s *StateDB) Reset(root common.Hash) error {
 	s.logSize = 0
 	s.preimages = make(map[common.Hash][]byte)
 	s.clearJournalAndRefund()
+
+	if s.snaps != nil {
+		s.snapAccounts, s.snapStorage = nil, nil
+		if s.snap = s.snaps.Snapshot(root); s.snap != nil {
+			s.snapAccounts = make(map[common.Hash][]byte)
+			s.snapStorage = make(map[common.Hash]map[common.Hash][]byte)
+		}
+	}
 	return nil
 }
 
@@ -438,6 +465,11 @@ func (s *StateDB) updateStateObject(obj *stateObject) {
 		panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err))
 	}
 	s.setError(s.trie.TryUpdate(addr[:], data))
+
+	// If state snapshotting is active, cache the data til commit
+	if s.snap != nil {
+		s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
+	}
 }
 
 // deleteStateObject removes the given object from the state trie.
@@ -449,6 +481,14 @@ func (s *StateDB) deleteStateObject(obj *stateObject) {
 	// Delete the account from the trie
 	addr := obj.Address()
 	s.setError(s.trie.TryDelete(addr[:]))
+
+	// If state snapshotting is active, cache the data til commit
+	if s.snap != nil {
+		s.snapLock.Lock()
+		s.snapAccounts[obj.addrHash] = nil // We need to maintain account deletions explicitly
+		s.snapStorage[obj.addrHash] = nil  // We need to maintain storage deletions explicitly
+		s.snapLock.Unlock()
+	}
 }
 
 // getStateObject retrieves a state object given by the address, returning nil if
@@ -470,20 +510,38 @@ func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject {
 	if obj := s.stateObjects[addr]; obj != nil {
 		return obj
 	}
-	// Track the amount of time wasted on loading the object from the database
-	if metrics.EnabledExpensive {
-		defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now())
-	}
-	// Load the object from the database
-	enc, err := s.trie.TryGet(addr[:])
-	if len(enc) == 0 {
-		s.setError(err)
-		return nil
-	}
+	// If no live objects are available, attempt to use snapshots
 	var data Account
-	if err := rlp.DecodeBytes(enc, &data); err != nil {
-		log.Error("Failed to decode state object", "addr", addr, "err", err)
-		return nil
+	if s.snap != nil {
+		if metrics.EnabledExpensive {
+			defer func(start time.Time) { s.SnapshotAccountReads += time.Since(start) }(time.Now())
+		}
+		acc := s.snap.Account(crypto.Keccak256Hash(addr[:]))
+		if acc == nil {
+			return nil
+		}
+		data.Nonce, data.Balance, data.CodeHash = acc.Nonce, acc.Balance, acc.CodeHash
+		if len(data.CodeHash) == 0 {
+			data.CodeHash = emptyCodeHash
+		}
+		data.Root = common.BytesToHash(acc.Root)
+		if data.Root == (common.Hash{}) {
+			data.Root = emptyRoot
+		}
+	} else {
+		// Snapshot unavailable, fall back to the trie
+		if metrics.EnabledExpensive {
+			defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now())
+		}
+		enc, err := s.trie.TryGet(addr[:])
+		if len(enc) == 0 {
+			s.setError(err)
+			return nil
+		}
+		if err := rlp.DecodeBytes(enc, &data); err != nil {
+			log.Error("Failed to decode state object", "addr", addr, "err", err)
+			return nil
+		}
 	}
 	// Insert into the live set
 	obj := newObject(s, addr, data)
@@ -748,13 +806,14 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 		s.stateObjectsDirty = make(map[common.Address]struct{})
 	}
 	// Write the account trie changes, measuing the amount of wasted time
+	var start time.Time
 	if metrics.EnabledExpensive {
-		defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now())
+		start = time.Now()
 	}
 	// The onleaf func is called _serially_, so we can reuse the same account
 	// for unmarshalling every time.
 	var account Account
-	return s.trie.Commit(func(leaf []byte, parent common.Hash) error {
+	root, err := s.trie.Commit(func(leaf []byte, parent common.Hash) error {
 		if err := rlp.DecodeBytes(leaf, &account); err != nil {
 			return nil
 		}
@@ -767,4 +826,22 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 		}
 		return nil
 	})
+	if metrics.EnabledExpensive {
+		s.AccountCommits += time.Since(start)
+	}
+	// If snapshotting is enabled, update the snapshot tree with this new version
+	if s.snap != nil {
+		if metrics.EnabledExpensive {
+			defer func(start time.Time) { s.SnapshotCommits += time.Since(start) }(time.Now())
+		}
+		_, parentRoot := s.snap.Info()
+		if err := s.snaps.Update(root, parentRoot, s.snapAccounts, s.snapStorage); err != nil {
+			log.Warn("Failed to update snapshot tree", "from", parentRoot, "to", root, "err", err)
+		}
+		if err := s.snaps.Cap(root, 16, 4*1024*1024); err != nil {
+			log.Warn("Failed to cap snapshot tree", "root", root, "layers", 16, "memory", 4*1024*1024, "err", err)
+		}
+		s.snap, s.snapAccounts, s.snapStorage = nil, nil, nil
+	}
+	return root, err
 }
diff --git a/core/state_prefetcher.go b/core/state_prefetcher.go
index cb85a05b5..bb5db4ced 100644
--- a/core/state_prefetcher.go
+++ b/core/state_prefetcher.go
@@ -65,6 +65,8 @@ func (p *statePrefetcher) Prefetch(block *types.Block, statedb *state.StateDB, c
 			return // Ugh, something went horribly wrong, bail out
 		}
 	}
+	// All transactions processed, finalize the block to force loading written-only trie paths
+	statedb.Finalise(true) // TODO(karalabe): should we run this on interrupt too?
 }
 
 // precacheTransaction attempts to apply a transaction to the given state database
diff --git a/core/vm/runtime/runtime.go b/core/vm/runtime/runtime.go
index dd5dba66f..9cb492786 100644
--- a/core/vm/runtime/runtime.go
+++ b/core/vm/runtime/runtime.go
@@ -99,7 +99,7 @@ func Execute(code, input []byte, cfg *Config) ([]byte, *state.StateDB, error) {
 	setDefaults(cfg)
 
 	if cfg.State == nil {
-		cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	}
 	var (
 		address = common.BytesToAddress([]byte("contract"))
@@ -129,7 +129,7 @@ func Create(input []byte, cfg *Config) ([]byte, common.Address, uint64, error) {
 	setDefaults(cfg)
 
 	if cfg.State == nil {
-		cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	}
 	var (
 		vmenv  = NewEnv(cfg)
diff --git a/core/vm/runtime/runtime_test.go b/core/vm/runtime/runtime_test.go
index f2d05118c..fb07d69d0 100644
--- a/core/vm/runtime/runtime_test.go
+++ b/core/vm/runtime/runtime_test.go
@@ -98,7 +98,7 @@ func TestExecute(t *testing.T) {
 }
 
 func TestCall(t *testing.T) {
-	state, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	state, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	address := common.HexToAddress("0x0a")
 	state.SetCode(address, []byte{
 		byte(vm.PUSH1), 10,
@@ -154,7 +154,7 @@ func BenchmarkCall(b *testing.B) {
 }
 func benchmarkEVM_Create(bench *testing.B, code string) {
 	var (
-		statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		sender     = common.BytesToAddress([]byte("sender"))
 		receiver   = common.BytesToAddress([]byte("receiver"))
 	)
diff --git a/eth/api_test.go b/eth/api_test.go
index 1e7c489c3..ab846db3e 100644
--- a/eth/api_test.go
+++ b/eth/api_test.go
@@ -64,7 +64,7 @@ func (h resultHash) Less(i, j int) bool { return bytes.Compare(h[i].Bytes(), h[j
 func TestAccountRange(t *testing.T) {
 	var (
 		statedb  = state.NewDatabase(rawdb.NewMemoryDatabase())
-		state, _ = state.New(common.Hash{}, statedb)
+		state, _ = state.New(common.Hash{}, statedb, nil)
 		addrs    = [AccountRangeMaxResults * 2]common.Address{}
 		m        = map[common.Address]bool{}
 	)
@@ -162,7 +162,7 @@ func TestAccountRange(t *testing.T) {
 func TestEmptyAccountRange(t *testing.T) {
 	var (
 		statedb  = state.NewDatabase(rawdb.NewMemoryDatabase())
-		state, _ = state.New(common.Hash{}, statedb)
+		state, _ = state.New(common.Hash{}, statedb, nil)
 	)
 
 	state.Commit(true)
@@ -188,7 +188,7 @@ func TestEmptyAccountRange(t *testing.T) {
 func TestStorageRangeAt(t *testing.T) {
 	// Create a state where account 0x010000... has a few storage entries.
 	var (
-		state, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		state, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		addr     = common.Address{0x01}
 		keys     = []common.Hash{ // hashes of Keys of storage
 			common.HexToHash("340dd630ad21bf010b4e676dbfa9ba9a02175262d1fa356232cfde6cb5b47ef2"),
diff --git a/eth/api_tracer.go b/eth/api_tracer.go
index ce211cbd9..560f46044 100644
--- a/eth/api_tracer.go
+++ b/eth/api_tracer.go
@@ -155,7 +155,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
 			return nil, fmt.Errorf("parent block #%d not found", number-1)
 		}
 	}
-	statedb, err := state.New(start.Root(), database)
+	statedb, err := state.New(start.Root(), database, nil)
 	if err != nil {
 		// If the starting state is missing, allow some number of blocks to be reexecuted
 		reexec := defaultTraceReexec
@@ -168,7 +168,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
 			if start == nil {
 				break
 			}
-			if statedb, err = state.New(start.Root(), database); err == nil {
+			if statedb, err = state.New(start.Root(), database, nil); err == nil {
 				break
 			}
 		}
@@ -648,7 +648,7 @@ func (api *PrivateDebugAPI) computeStateDB(block *types.Block, reexec uint64) (*
 		if block == nil {
 			break
 		}
-		if statedb, err = state.New(block.Root(), database); err == nil {
+		if statedb, err = state.New(block.Root(), database, nil); err == nil {
 			break
 		}
 	}
diff --git a/eth/handler_test.go b/eth/handler_test.go
index 4a4e1f955..670fd2b14 100644
--- a/eth/handler_test.go
+++ b/eth/handler_test.go
@@ -349,7 +349,7 @@ func testGetNodeData(t *testing.T, protocol int) {
 	}
 	accounts := []common.Address{testBank, acc1Addr, acc2Addr}
 	for i := uint64(0); i <= pm.blockchain.CurrentBlock().NumberU64(); i++ {
-		trie, _ := state.New(pm.blockchain.GetBlockByNumber(i).Root(), state.NewDatabase(statedb))
+		trie, _ := state.New(pm.blockchain.GetBlockByNumber(i).Root(), state.NewDatabase(statedb), nil)
 
 		for j, acc := range accounts {
 			state, _ := pm.blockchain.State()
diff --git a/light/odr_test.go b/light/odr_test.go
index debd5544c..9149c02fc 100644
--- a/light/odr_test.go
+++ b/light/odr_test.go
@@ -149,7 +149,7 @@ func odrAccounts(ctx context.Context, db ethdb.Database, bc *core.BlockChain, lc
 		st = NewState(ctx, header, lc.Odr())
 	} else {
 		header := bc.GetHeaderByHash(bhash)
-		st, _ = state.New(header.Root, state.NewDatabase(db))
+		st, _ = state.New(header.Root, state.NewDatabase(db), nil)
 	}
 
 	var res []byte
@@ -189,7 +189,7 @@ func odrContractCall(ctx context.Context, db ethdb.Database, bc *core.BlockChain
 		} else {
 			chain = bc
 			header = bc.GetHeaderByHash(bhash)
-			st, _ = state.New(header.Root, state.NewDatabase(db))
+			st, _ = state.New(header.Root, state.NewDatabase(db), nil)
 		}
 
 		// Perform read-only call.
diff --git a/light/trie.go b/light/trie.go
index e512bf6f9..0d69e74e2 100644
--- a/light/trie.go
+++ b/light/trie.go
@@ -30,7 +30,7 @@ import (
 )
 
 func NewState(ctx context.Context, head *types.Header, odr OdrBackend) *state.StateDB {
-	state, _ := state.New(head.Root, NewStateDatabase(ctx, head, odr))
+	state, _ := state.New(head.Root, NewStateDatabase(ctx, head, odr), nil)
 	return state
 }
 
diff --git a/tests/state_test_util.go b/tests/state_test_util.go
index 59ebcb6e1..a10d044cd 100644
--- a/tests/state_test_util.go
+++ b/tests/state_test_util.go
@@ -206,7 +206,7 @@ func (t *StateTest) gasLimit(subtest StateSubtest) uint64 {
 
 func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB {
 	sdb := state.NewDatabase(db)
-	statedb, _ := state.New(common.Hash{}, sdb)
+	statedb, _ := state.New(common.Hash{}, sdb, nil)
 	for addr, a := range accounts {
 		statedb.SetCode(addr, a.Code)
 		statedb.SetNonce(addr, a.Nonce)
@@ -217,7 +217,7 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB
 	}
 	// Commit and re-open to start with a clean state.
 	root, _ := statedb.Commit(false)
-	statedb, _ = state.New(root, sdb)
+	statedb, _ = state.New(root, sdb, nil)
 	return statedb
 }
 
diff --git a/trie/iterator.go b/trie/iterator.go
index bb4025d8f..88189c542 100644
--- a/trie/iterator.go
+++ b/trie/iterator.go
@@ -29,6 +29,7 @@ import (
 type Iterator struct {
 	nodeIt NodeIterator
 
+	Nodes int    // Number of nodes iterated over
 	Key   []byte // Current data key on which the iterator is positioned on
 	Value []byte // Current data value on which the iterator is positioned on
 	Err   error
@@ -46,6 +47,7 @@ func NewIterator(it NodeIterator) *Iterator {
 // Next moves the iterator forward one key-value entry.
 func (it *Iterator) Next() bool {
 	for it.nodeIt.Next(true) {
+		it.Nodes++
 		if it.nodeIt.Leaf() {
 			it.Key = it.nodeIt.LeafKey()
 			it.Value = it.nodeIt.LeafBlob()

From e146fbe4e739e5912aadcceb77f9aff803b4a052 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Fri, 4 Oct 2019 15:24:01 +0200
Subject: [PATCH 02/28] core/state: lazy sorting, snapshot invalidation

---
 core/state/snapshot/difflayer.go      | 177 ++++++----
 core/state/snapshot/difflayer_test.go | 448 ++++++++++++++++++++++++++
 core/state/snapshot/disklayer.go      |  44 ++-
 core/state/snapshot/generate.go       |   5 +-
 core/state/snapshot/snapshot.go       |  17 +-
 core/state/snapshot/sort.go           |  30 ++
 core/state/state_object.go            |   8 +-
 core/state/statedb.go                 |  34 +-
 8 files changed, 671 insertions(+), 92 deletions(-)
 create mode 100644 core/state/snapshot/difflayer_test.go

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index f163feb56..c7a65e2a4 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -40,13 +40,12 @@ type diffLayer struct {
 
 	number uint64      // Block number to which this snapshot diff belongs to
 	root   common.Hash // Root hash to which this snapshot diff belongs to
+	stale  bool        // Signals that the layer became stale (state progressed)
 
-	accountList   []common.Hash                          // List of account for iteration, might not be sorted yet (lazy)
-	accountSorted bool                                   // Flag whether the account list has alreayd been sorted or not
-	accountData   map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
-	storageList   map[common.Hash][]common.Hash          // List of storage slots for iterated retrievals, one per account
-	storageSorted map[common.Hash]bool                   // Flag whether the storage slot list has alreayd been sorted or not
-	storageData   map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted)
+	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
+	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
+	storageList map[common.Hash][]common.Hash          // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil
+	storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted)
 
 	lock sync.RWMutex
 }
@@ -62,21 +61,13 @@ func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map
 		accountData: accounts,
 		storageData: storage,
 	}
-	// Fill the account hashes and sort them for the iterator
-	accountList := make([]common.Hash, 0, len(accounts))
-	for hash, data := range accounts {
-		accountList = append(accountList, hash)
+	// Determine mem size
+	for _, data := range accounts {
 		dl.memory += uint64(len(data))
 	}
-	sort.Sort(hashes(accountList))
-	dl.accountList = accountList
-	dl.accountSorted = true
-
-	dl.memory += uint64(len(dl.accountList) * common.HashLength)
 
 	// Fill the storage hashes and sort them for the iterator
-	dl.storageList = make(map[common.Hash][]common.Hash, len(storage))
-	dl.storageSorted = make(map[common.Hash]bool, len(storage))
+	dl.storageList = make(map[common.Hash][]common.Hash)
 
 	for accountHash, slots := range storage {
 		// If the slots are nil, sanity check that it's a deleted account
@@ -93,19 +84,11 @@ func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map
 		// account was just updated.
 		if account, ok := accounts[accountHash]; account == nil || !ok {
 			log.Error(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
-			//panic(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
 		}
-		// Fill the storage hashes for this account and sort them for the iterator
-		storageList := make([]common.Hash, 0, len(slots))
-		for storageHash, data := range slots {
-			storageList = append(storageList, storageHash)
+		// Determine mem size
+		for _, data := range slots {
 			dl.memory += uint64(len(data))
 		}
-		sort.Sort(hashes(storageList))
-		dl.storageList[accountHash] = storageList
-		dl.storageSorted[accountHash] = true
-
-		dl.memory += uint64(len(storageList) * common.HashLength)
 	}
 	dl.memory += uint64(len(dl.storageList) * common.HashLength)
 
@@ -119,28 +102,36 @@ func (dl *diffLayer) Info() (uint64, common.Hash) {
 
 // Account directly retrieves the account associated with a particular hash in
 // the snapshot slim data format.
-func (dl *diffLayer) Account(hash common.Hash) *Account {
-	data := dl.AccountRLP(hash)
+func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {
+	data, err := dl.AccountRLP(hash)
+	if err != nil {
+		return nil, err
+	}
 	if len(data) == 0 { // can be both nil and []byte{}
-		return nil
+		return nil, nil
 	}
 	account := new(Account)
 	if err := rlp.DecodeBytes(data, account); err != nil {
 		panic(err)
 	}
-	return account
+	return account, nil
 }
 
 // AccountRLP directly retrieves the account RLP associated with a particular
 // hash in the snapshot slim data format.
-func (dl *diffLayer) AccountRLP(hash common.Hash) []byte {
+func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.stale {
+		return nil, ErrSnapshotStale
+	}
 	// If the account is known locally, return it. Note, a nil account means it was
 	// deleted, and is a different notion than an unknown account!
 	if data, ok := dl.accountData[hash]; ok {
-		return data
+		return data, nil
 	}
 	// Account unknown to this diff, resolve from parent
 	return dl.parent.AccountRLP(hash)
@@ -149,18 +140,23 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) []byte {
 // Storage directly retrieves the storage data associated with a particular hash,
 // within a particular account. If the slot is unknown to this diff, it's parent
 // is consulted.
-func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) []byte {
+func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.stale {
+		return nil, ErrSnapshotStale
+	}
 	// If the account is known locally, try to resolve the slot locally. Note, a nil
 	// account means it was deleted, and is a different notion than an unknown account!
 	if storage, ok := dl.storageData[accountHash]; ok {
 		if storage == nil {
-			return nil
+			return nil, nil
 		}
 		if data, ok := storage[storageHash]; ok {
-			return data
+			return data, nil
 		}
 	}
 	// Account - or slot within - unknown to this diff, resolve from parent
@@ -193,13 +189,17 @@ func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) {
 	case *diskLayer:
 		return parent.number, dl.number
 	case *diffLayer:
+		// Flatten the parent into the grandparent. The flattening internally obtains a
+		// write lock on grandparent.
+		flattened := parent.flatten().(*diffLayer)
+
 		dl.lock.Lock()
 		defer dl.lock.Unlock()
 
-		dl.parent = parent.flatten()
-		if dl.parent.(*diffLayer).memory < memory {
-			diskNumber, _ := parent.parent.Info()
-			return diskNumber, parent.number
+		dl.parent = flattened
+		if flattened.memory < memory {
+			diskNumber, _ := flattened.parent.Info()
+			return diskNumber, flattened.number
 		}
 	default:
 		panic(fmt.Sprintf("unknown data layer: %T", parent))
@@ -213,10 +213,18 @@ func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) {
 	parent.lock.RLock()
 	defer parent.lock.RUnlock()
 
-	// Start by temporarilly deleting the current snapshot block marker. This
+	// Start by temporarily deleting the current snapshot block marker. This
 	// ensures that in the case of a crash, the entire snapshot is invalidated.
 	rawdb.DeleteSnapshotBlock(batch)
 
+	// Mark the original base as stale as we're going to create a new wrapper
+	base.lock.Lock()
+	if base.stale {
+		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
+	}
+	base.stale = true
+	base.lock.Unlock()
+
 	// Push all the accounts into the database
 	for hash, data := range parent.accountData {
 		if len(data) > 0 {
@@ -264,15 +272,20 @@ func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) {
 		}
 	}
 	// Update the snapshot block marker and write any remainder data
-	base.number, base.root = parent.number, parent.root
-
-	rawdb.WriteSnapshotBlock(batch, base.number, base.root)
+	newBase := &diskLayer{
+		root:    parent.root,
+		number:  parent.number,
+		cache:   base.cache,
+		db:      base.db,
+		journal: base.journal,
+	}
+	rawdb.WriteSnapshotBlock(batch, newBase.number, newBase.root)
 	if err := batch.Write(); err != nil {
 		log.Crit("Failed to write leftover snapshot", "err", err)
 	}
-	dl.parent = base
+	dl.parent = newBase
 
-	return base.number, dl.number
+	return newBase.number, dl.number
 }
 
 // flatten pushes all data from this point downwards, flattening everything into
@@ -289,19 +302,25 @@ func (dl *diffLayer) flatten() snapshot {
 	// be smarter about grouping flattens together).
 	parent = parent.flatten().(*diffLayer)
 
+	parent.lock.Lock()
+	defer parent.lock.Unlock()
+
+	// Before actually writing all our data to the parent, first ensure that the
+	// parent hasn't been 'corrupted' by someone else already flattening into it
+	if parent.stale {
+		panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo
+	}
+	parent.stale = true
+
 	// Overwrite all the updated accounts blindly, merge the sorted list
 	for hash, data := range dl.accountData {
 		parent.accountData[hash] = data
 	}
-	parent.accountList = append(parent.accountList, dl.accountList...) // TODO(karalabe): dedup!!
-	parent.accountSorted = false
-
 	// Overwrite all the updates storage slots (individually)
 	for accountHash, storage := range dl.storageData {
 		// If storage didn't exist (or was deleted) in the parent; or if the storage
 		// was freshly deleted in the child, overwrite blindly
 		if parent.storageData[accountHash] == nil || storage == nil {
-			parent.storageList[accountHash] = dl.storageList[accountHash]
 			parent.storageData[accountHash] = storage
 			continue
 		}
@@ -311,14 +330,18 @@ func (dl *diffLayer) flatten() snapshot {
 			comboData[storageHash] = data
 		}
 		parent.storageData[accountHash] = comboData
-		parent.storageList[accountHash] = append(parent.storageList[accountHash], dl.storageList[accountHash]...) // TODO(karalabe): dedup!!
-		parent.storageSorted[accountHash] = false
 	}
 	// Return the combo parent
-	parent.number = dl.number
-	parent.root = dl.root
-	parent.memory += dl.memory
-	return parent
+	return &diffLayer{
+		parent:      parent.parent,
+		number:      dl.number,
+		root:        dl.root,
+		storageList: parent.storageList,
+		storageData: parent.storageData,
+		accountList: parent.accountList,
+		accountData: parent.accountData,
+		memory:      parent.memory + dl.memory,
+	}
 }
 
 // Journal commits an entire diff hierarchy to disk into a single journal file.
@@ -335,3 +358,45 @@ func (dl *diffLayer) Journal() error {
 	writer.Close()
 	return nil
 }
+
+// AccountList returns a sorted list of all accounts in this difflayer.
+func (dl *diffLayer) AccountList() []common.Hash {
+	dl.lock.Lock()
+	defer dl.lock.Unlock()
+	if dl.accountList != nil {
+		return dl.accountList
+	}
+	accountList := make([]common.Hash, len(dl.accountData))
+	i := 0
+	for k, _ := range dl.accountData {
+		accountList[i] = k
+		i++
+		// This would be a pretty good opportunity to also
+		// calculate the size, if we want to
+	}
+	sort.Sort(hashes(accountList))
+	dl.accountList = accountList
+	return dl.accountList
+}
+
+// StorageList returns a sorted list of all storage slot hashes
+// in this difflayer for the given account.
+func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
+	dl.lock.Lock()
+	defer dl.lock.Unlock()
+	if dl.storageList[accountHash] != nil {
+		return dl.storageList[accountHash]
+	}
+	accountStorageMap := dl.storageData[accountHash]
+	accountStorageList := make([]common.Hash, len(accountStorageMap))
+	i := 0
+	for k, _ := range accountStorageMap {
+		accountStorageList[i] = k
+		i++
+		// This would be a pretty good opportunity to also
+		// calculate the size, if we want to
+	}
+	sort.Sort(hashes(accountStorageList))
+	dl.storageList[accountHash] = accountStorageList
+	return accountStorageList
+}
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
new file mode 100644
index 000000000..5a718c617
--- /dev/null
+++ b/core/state/snapshot/difflayer_test.go
@@ -0,0 +1,448 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"fmt"
+	"math/big"
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/allegro/bigcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+func randomAccount() []byte {
+	root := randomHash()
+	a := Account{
+		Balance:  big.NewInt(rand.Int63()),
+		Nonce:    rand.Uint64(),
+		Root:     root[:],
+		CodeHash: emptyCode[:],
+	}
+	data, _ := rlp.EncodeToBytes(a)
+	return data
+}
+
+// TestMergeBasics tests some simple merges
+func TestMergeBasics(t *testing.T) {
+	var (
+		accounts = make(map[common.Hash][]byte)
+		storage  = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	for i := 0; i < 100; i++ {
+		h := randomHash()
+		data := randomAccount()
+
+		accounts[h] = data
+		if rand.Intn(20) < 10 {
+			accStorage := make(map[common.Hash][]byte)
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[h] = accStorage
+		}
+	}
+	// Add some (identical) layers on top
+	parent := newDiffLayer(emptyLayer{}, 1, common.Hash{}, accounts, storage)
+	child := newDiffLayer(parent, 1, common.Hash{}, accounts, storage)
+	child = newDiffLayer(child, 1, common.Hash{}, accounts, storage)
+	child = newDiffLayer(child, 1, common.Hash{}, accounts, storage)
+	child = newDiffLayer(child, 1, common.Hash{}, accounts, storage)
+	// And flatten
+	merged := (child.flatten()).(*diffLayer)
+
+	{ // Check account lists
+		// Should be zero/nil first
+		if got, exp := len(merged.accountList), 0; got != exp {
+			t.Errorf("accountList wrong, got %v exp %v", got, exp)
+		}
+		// Then set when we call AccountList
+		if got, exp := len(merged.AccountList()), len(accounts); got != exp {
+			t.Errorf("AccountList() wrong, got %v exp %v", got, exp)
+		}
+		if got, exp := len(merged.accountList), len(accounts); got != exp {
+			t.Errorf("accountList [2] wrong, got %v exp %v", got, exp)
+		}
+	}
+	{ // Check storage lists
+		i := 0
+		for aHash, sMap := range storage {
+			if got, exp := len(merged.storageList), i; got != exp {
+				t.Errorf("[1] storageList wrong, got %v exp %v", got, exp)
+			}
+			if got, exp := len(merged.StorageList(aHash)), len(sMap); got != exp {
+				t.Errorf("[2] StorageList() wrong, got %v exp %v", got, exp)
+			}
+			if got, exp := len(merged.storageList[aHash]), len(sMap); got != exp {
+				t.Errorf("storageList wrong, got %v exp %v", got, exp)
+			}
+			i++
+		}
+	}
+}
+
+// TestMergeDelete tests some deletion
+func TestMergeDelete(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	h1 := common.HexToHash("0x01")
+	h2 := common.HexToHash("0x02")
+
+	flip := func() map[common.Hash][]byte {
+		accs := make(map[common.Hash][]byte)
+		accs[h1] = randomAccount()
+		accs[h2] = nil
+		return accs
+	}
+	flop := func() map[common.Hash][]byte {
+		accs := make(map[common.Hash][]byte)
+		accs[h1] = nil
+		accs[h2] = randomAccount()
+		return accs
+	}
+
+	// Add some flip-flopping layers on top
+	parent := newDiffLayer(emptyLayer{}, 1, common.Hash{}, flip(), storage)
+	child := parent.Update(common.Hash{}, flop(), storage)
+	child = child.Update(common.Hash{}, flip(), storage)
+	child = child.Update(common.Hash{}, flop(), storage)
+	child = child.Update(common.Hash{}, flip(), storage)
+	child = child.Update(common.Hash{}, flop(), storage)
+	child = child.Update(common.Hash{}, flip(), storage)
+
+	if data, _ := child.Account(h1); data == nil {
+		t.Errorf("last diff layer: expected %x to be non-nil", h1)
+	}
+	if data, _ := child.Account(h2); data != nil {
+		t.Errorf("last diff layer: expected %x to be nil", h2)
+	}
+	// And flatten
+	merged := (child.flatten()).(*diffLayer)
+
+	// check number
+	if got, exp := merged.number, child.number; got != exp {
+		t.Errorf("merged layer: wrong number - exp %d got %d", exp, got)
+	}
+	if data, _ := merged.Account(h1); data == nil {
+		t.Errorf("merged layer: expected %x to be non-nil", h1)
+	}
+	if data, _ := merged.Account(h2); data != nil {
+		t.Errorf("merged layer: expected %x to be nil", h2)
+	}
+	// If we add more granular metering of memory, we can enable this again,
+	// but it's not implemented for now
+	//if got, exp := merged.memory, child.memory; got != exp {
+	//	t.Errorf("mem wrong, got %d, exp %d", got, exp)
+	//}
+}
+
+// This tests that if we create a new account, and set a slot, and then merge
+// it, the lists will be correct.
+func TestInsertAndMerge(t *testing.T) {
+	// Fill up a parent
+	var (
+		acc    = common.HexToHash("0x01")
+		slot   = common.HexToHash("0x02")
+		parent *diffLayer
+		child  *diffLayer
+	)
+	{
+		var accounts = make(map[common.Hash][]byte)
+		var storage = make(map[common.Hash]map[common.Hash][]byte)
+		parent = newDiffLayer(emptyLayer{}, 1, common.Hash{}, accounts, storage)
+	}
+	{
+		var accounts = make(map[common.Hash][]byte)
+		var storage = make(map[common.Hash]map[common.Hash][]byte)
+		accounts[acc] = randomAccount()
+		accstorage := make(map[common.Hash][]byte)
+		storage[acc] = accstorage
+		storage[acc][slot] = []byte{0x01}
+		child = newDiffLayer(parent, 2, common.Hash{}, accounts, storage)
+	}
+	// And flatten
+	merged := (child.flatten()).(*diffLayer)
+	{ // Check that slot value is present
+		got, _ := merged.Storage(acc, slot)
+		if exp := []byte{0x01}; bytes.Compare(got, exp) != 0 {
+			t.Errorf("merged slot value wrong, got %x, exp %x", got, exp)
+		}
+	}
+}
+
+// TestCapTree tests some functionality regarding capping/flattening
+func TestCapTree(t *testing.T) {
+
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	setAccount := func(accKey string) map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			common.HexToHash(accKey): randomAccount(),
+		}
+	}
+	// the bottom-most layer, aside from the 'disk layer'
+	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
+		Shards:             1,
+		LifeWindow:         time.Hour,
+		MaxEntriesInWindow: 1 * 1024,
+		MaxEntrySize:       1,
+		HardMaxCacheSize:   1,
+	})
+
+	base := &diskLayer{
+		journal: "",
+		db:      rawdb.NewMemoryDatabase(),
+		cache:   cache,
+		number:  0,
+		root:    common.HexToHash("0x01"),
+	}
+	// The lowest difflayer
+	a1 := base.Update(common.HexToHash("0xa1"), setAccount("0xa1"), storage)
+
+	a2 := a1.Update(common.HexToHash("0xa2"), setAccount("0xa2"), storage)
+	b2 := a1.Update(common.HexToHash("0xb2"), setAccount("0xb2"), storage)
+
+	a3 := a2.Update(common.HexToHash("0xa3"), setAccount("0xa3"), storage)
+	b3 := b2.Update(common.HexToHash("0xb3"), setAccount("0xb3"), storage)
+
+	checkExist := func(layer *diffLayer, key string) error {
+		accountKey := common.HexToHash(key)
+		data, _ := layer.Account(accountKey)
+		if data == nil {
+			return fmt.Errorf("expected %x to exist, got nil", accountKey)
+		}
+		return nil
+	}
+	shouldErr := func(layer *diffLayer, key string) error {
+		accountKey := common.HexToHash(key)
+		data, err := layer.Account(accountKey)
+		if err == nil {
+			return fmt.Errorf("expected error, got data %x", data)
+		}
+		return nil
+	}
+
+	// check basics
+	if err := checkExist(b3, "0xa1"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(b3, "0xb2"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(b3, "0xb3"); err != nil {
+		t.Error(err)
+	}
+	// Now, merge the a-chain
+	diskNum, diffNum := a3.Cap(0, 1024)
+	if diskNum != 0 {
+		t.Errorf("disk layer err, got %d exp %d", diskNum, 0)
+	}
+	if diffNum != 2 {
+		t.Errorf("diff layer err, got %d exp %d", diffNum, 2)
+	}
+	// At this point, a2 got merged into a1. Thus, a1 is now modified,
+	// and as a1 is the parent of b2, b2 should no longer be able to iterate into parent
+
+	// These should still be accessible
+	if err := checkExist(b3, "0xb2"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(b3, "0xb3"); err != nil {
+		t.Error(err)
+	}
+	//b2ParentNum, _ := b2.parent.Info()
+	//if b2.parent.invalid == false
+	//	t.Errorf("err, exp parent to be invalid, got %v", b2.parent, b2ParentNum)
+	//}
+	// But these would need iteration into the modified parent:
+	if err := shouldErr(b3, "0xa1"); err != nil {
+		t.Error(err)
+	}
+	if err := shouldErr(b3, "0xa2"); err != nil {
+		t.Error(err)
+	}
+	if err := shouldErr(b3, "0xa3"); err != nil {
+		t.Error(err)
+	}
+}
+
+type emptyLayer struct{}
+
+func (emptyLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	panic("implement me")
+}
+
+func (emptyLayer) Cap(layers int, memory uint64) (uint64, uint64) {
+	panic("implement me")
+}
+
+func (emptyLayer) Journal() error {
+	panic("implement me")
+}
+
+func (emptyLayer) Info() (uint64, common.Hash) {
+	return 0, common.Hash{}
+}
+func (emptyLayer) Number() uint64 {
+	return 0
+}
+
+func (emptyLayer) Account(hash common.Hash) (*Account, error) {
+	return nil, nil
+}
+
+func (emptyLayer) AccountRLP(hash common.Hash) ([]byte, error) {
+	return nil, nil
+}
+
+func (emptyLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
+	return nil, nil
+}
+
+// BenchmarkSearch checks how long it takes to find a non-existing key
+// BenchmarkSearch-6   	  200000	     10481 ns/op (1K per layer)
+// BenchmarkSearch-6   	  200000	     10760 ns/op (10K per layer)
+// BenchmarkSearch-6   	  100000	     17866 ns/op
+//
+// BenchmarkSearch-6   	  500000	      3723 ns/op (10k per layer, only top-level RLock()
+func BenchmarkSearch(b *testing.B) {
+	// First, we set up 128 diff layers, with 1K items each
+
+	blocknum := uint64(0)
+	fill := func(parent snapshot) *diffLayer {
+		accounts := make(map[common.Hash][]byte)
+		storage := make(map[common.Hash]map[common.Hash][]byte)
+
+		for i := 0; i < 10000; i++ {
+			accounts[randomHash()] = randomAccount()
+		}
+		blocknum++
+		return newDiffLayer(parent, blocknum, common.Hash{}, accounts, storage)
+	}
+
+	var layer snapshot
+	layer = emptyLayer{}
+	for i := 0; i < 128; i++ {
+		layer = fill(layer)
+	}
+
+	key := common.Hash{}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		layer.AccountRLP(key)
+	}
+}
+
+// BenchmarkSearchSlot checks how long it takes to find a non-existing key
+// - Number of layers: 128
+// - Each layers contains the account, with a couple of storage slots
+// BenchmarkSearchSlot-6   	  100000	     14554 ns/op
+// BenchmarkSearchSlot-6   	  100000	     22254 ns/op (when checking parent root using mutex)
+// BenchmarkSearchSlot-6   	  100000	     14551 ns/op (when checking parent number using atomic)
+func BenchmarkSearchSlot(b *testing.B) {
+	// First, we set up 128 diff layers, with 1K items each
+
+	blocknum := uint64(0)
+	accountKey := common.Hash{}
+	storageKey := common.HexToHash("0x1337")
+	accountRLP := randomAccount()
+	fill := func(parent snapshot) *diffLayer {
+		accounts := make(map[common.Hash][]byte)
+		accounts[accountKey] = accountRLP
+		storage := make(map[common.Hash]map[common.Hash][]byte)
+
+		accStorage := make(map[common.Hash][]byte)
+		for i := 0; i < 5; i++ {
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[accountKey] = accStorage
+		}
+		blocknum++
+		return newDiffLayer(parent, blocknum, common.Hash{}, accounts, storage)
+	}
+
+	var layer snapshot
+	layer = emptyLayer{}
+	for i := 0; i < 128; i++ {
+		layer = fill(layer)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		layer.Storage(accountKey, storageKey)
+	}
+}
+
+// With accountList and sorting
+//BenchmarkFlatten-6   	      50	  29890856 ns/op
+//
+// Without sorting and tracking accountlist
+// BenchmarkFlatten-6   	     300	   5511511 ns/op
+func BenchmarkFlatten(b *testing.B) {
+
+	fill := func(parent snapshot, blocknum int) *diffLayer {
+		accounts := make(map[common.Hash][]byte)
+		storage := make(map[common.Hash]map[common.Hash][]byte)
+
+		for i := 0; i < 100; i++ {
+			accountKey := randomHash()
+			accounts[accountKey] = randomAccount()
+
+			accStorage := make(map[common.Hash][]byte)
+			for i := 0; i < 20; i++ {
+				value := make([]byte, 32)
+				rand.Read(value)
+				accStorage[randomHash()] = value
+
+			}
+			storage[accountKey] = accStorage
+		}
+		return newDiffLayer(parent, uint64(blocknum), common.Hash{}, accounts, storage)
+	}
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		var layer snapshot
+		layer = emptyLayer{}
+		for i := 1; i < 128; i++ {
+			layer = fill(layer, i)
+		}
+		b.StartTimer()
+
+		for i := 1; i < 128; i++ {
+			dl, ok := layer.(*diffLayer)
+			if !ok {
+				break
+			}
+
+			layer = dl.flatten()
+		}
+		b.StopTimer()
+	}
+}
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index 0406d298f..a9839f01a 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -17,6 +17,8 @@
 package snapshot
 
 import (
+	"sync"
+
 	"github.com/allegro/bigcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
@@ -32,6 +34,9 @@ type diskLayer struct {
 
 	number uint64      // Block number of the base snapshot
 	root   common.Hash // Root hash of the base snapshot
+	stale  bool        // Signals that the layer became stale (state progressed)
+
+	lock sync.RWMutex
 }
 
 // Info returns the block number and root hash for which this snapshot was made.
@@ -41,28 +46,39 @@ func (dl *diskLayer) Info() (uint64, common.Hash) {
 
 // Account directly retrieves the account associated with a particular hash in
 // the snapshot slim data format.
-func (dl *diskLayer) Account(hash common.Hash) *Account {
-	data := dl.AccountRLP(hash)
+func (dl *diskLayer) Account(hash common.Hash) (*Account, error) {
+	data, err := dl.AccountRLP(hash)
+	if err != nil {
+		return nil, err
+	}
 	if len(data) == 0 { // can be both nil and []byte{}
-		return nil
+		return nil, nil
 	}
 	account := new(Account)
 	if err := rlp.DecodeBytes(data, account); err != nil {
 		panic(err)
 	}
-	return account
+	return account, nil
 }
 
 // AccountRLP directly retrieves the account RLP associated with a particular
 // hash in the snapshot slim data format.
-func (dl *diskLayer) AccountRLP(hash common.Hash) []byte {
+func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.stale {
+		return nil, ErrSnapshotStale
+	}
 	key := string(hash[:])
 
 	// Try to retrieve the account from the memory cache
 	if blob, err := dl.cache.Get(key); err == nil {
 		snapshotCleanHitMeter.Mark(1)
 		snapshotCleanReadMeter.Mark(int64(len(blob)))
-		return blob
+		return blob, nil
 	}
 	// Cache doesn't contain account, pull from disk and cache for later
 	blob := rawdb.ReadAccountSnapshot(dl.db, hash)
@@ -71,19 +87,27 @@ func (dl *diskLayer) AccountRLP(hash common.Hash) []byte {
 	snapshotCleanMissMeter.Mark(1)
 	snapshotCleanWriteMeter.Mark(int64(len(blob)))
 
-	return blob
+	return blob, nil
 }
 
 // Storage directly retrieves the storage data associated with a particular hash,
 // within a particular account.
-func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) []byte {
+func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.stale {
+		return nil, ErrSnapshotStale
+	}
 	key := string(append(accountHash[:], storageHash[:]...))
 
 	// Try to retrieve the storage slot from the memory cache
 	if blob, err := dl.cache.Get(key); err == nil {
 		snapshotCleanHitMeter.Mark(1)
 		snapshotCleanReadMeter.Mark(int64(len(blob)))
-		return blob
+		return blob, nil
 	}
 	// Cache doesn't contain storage slot, pull from disk and cache for later
 	blob := rawdb.ReadStorageSnapshot(dl.db, accountHash, storageHash)
@@ -92,7 +116,7 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) []byte {
 	snapshotCleanMissMeter.Mark(1)
 	snapshotCleanWriteMeter.Mark(int64(len(blob)))
 
-	return blob
+	return blob, nil
 }
 
 // Update creates a new layer on top of the existing snapshot diff tree with
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
index 0d451fe50..4a66e0626 100644
--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -135,6 +135,7 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64,
 			curStorageNodes int
 			curAccountSize  common.StorageSize
 			curStorageSize  common.StorageSize
+			accountHash     = common.BytesToHash(accIt.Key)
 		)
 		var acc struct {
 			Nonce    uint64
@@ -148,7 +149,7 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64,
 		data := AccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
 		curAccountSize += common.StorageSize(1 + common.HashLength + len(data))
 
-		rawdb.WriteAccountSnapshot(batch, common.BytesToHash(accIt.Key), data)
+		rawdb.WriteAccountSnapshot(batch, accountHash, data)
 		if batch.ValueSize() > ethdb.IdealBatchSize {
 			batch.Write()
 			batch.Reset()
@@ -163,7 +164,7 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64,
 				curStorageSize += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value))
 				curStorageCount++
 
-				rawdb.WriteStorageSnapshot(batch, common.BytesToHash(accIt.Key), common.BytesToHash(storeIt.Key), storeIt.Value)
+				rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(storeIt.Key), storeIt.Value)
 				if batch.ValueSize() > ethdb.IdealBatchSize {
 					batch.Write()
 					batch.Reset()
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 6d4df96da..6a21d57dc 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -38,6 +38,11 @@ var (
 	snapshotCleanMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/miss", nil)
 	snapshotCleanReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/read", nil)
 	snapshotCleanWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/write", nil)
+
+	// ErrSnapshotStale is returned from data accessors if the underlying snapshot
+	// layer had been invalidated due to the chain progressing forward far enough
+	// to not maintain the layer's original state.
+	ErrSnapshotStale = errors.New("snapshot stale")
 )
 
 // Snapshot represents the functionality supported by a snapshot storage layer.
@@ -47,15 +52,15 @@ type Snapshot interface {
 
 	// Account directly retrieves the account associated with a particular hash in
 	// the snapshot slim data format.
-	Account(hash common.Hash) *Account
+	Account(hash common.Hash) (*Account, error)
 
 	// AccountRLP directly retrieves the account RLP associated with a particular
 	// hash in the snapshot slim data format.
-	AccountRLP(hash common.Hash) []byte
+	AccountRLP(hash common.Hash) ([]byte, error)
 
 	// Storage directly retrieves the storage data associated with a particular hash,
 	// within a particular account.
-	Storage(accountHash, storageHash common.Hash) []byte
+	Storage(accountHash, storageHash common.Hash) ([]byte, error)
 }
 
 // snapshot is the internal version of the snapshot data layer that supports some
@@ -80,7 +85,7 @@ type snapshot interface {
 }
 
 // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
-// base layer backed by a key-value store, on top of which arbitrarilly many in-
+// base layer backed by a key-value store, on top of which arbitrarily many in-
 // memory diff layers are topped. The memory diffs can form a tree with branching,
 // but the disk layer is singleton and common to all. If a reorg goes deeper than
 // the disk layer, everything needs to be deleted.
@@ -220,7 +225,7 @@ func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, hea
 	if _, err := os.Stat(journal); os.IsNotExist(err) {
 		// Journal doesn't exist, don't worry if it's not supposed to
 		if number != headNumber || root != headRoot {
-			return nil, fmt.Errorf("snapshot journal missing, head does't match snapshot: #%d [%#x] vs. #%d [%#x]",
+			return nil, fmt.Errorf("snapshot journal missing, head doesn't match snapshot: #%d [%#x] vs. #%d [%#x]",
 				headNumber, headRoot, number, root)
 		}
 		return base, nil
@@ -237,7 +242,7 @@ func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, hea
 	// Journal doesn't exist, don't worry if it's not supposed to
 	number, root = snapshot.Info()
 	if number != headNumber || root != headRoot {
-		return nil, fmt.Errorf("head does't match snapshot: #%d [%#x] vs. #%d [%#x]",
+		return nil, fmt.Errorf("head doesn't match snapshot: #%d [%#x] vs. #%d [%#x]",
 			headNumber, headRoot, number, root)
 	}
 	return snapshot, nil
diff --git a/core/state/snapshot/sort.go b/core/state/snapshot/sort.go
index 04729c60b..ee7cc4990 100644
--- a/core/state/snapshot/sort.go
+++ b/core/state/snapshot/sort.go
@@ -60,3 +60,33 @@ func merge(a, b []common.Hash) []common.Hash {
 	}
 	return result
 }
+
+// dedupMerge combines two sorted lists of hashes into a combo sorted one,
+// and removes duplicates in the process
+func dedupMerge(a, b []common.Hash) []common.Hash {
+	result := make([]common.Hash, len(a)+len(b))
+	i := 0
+	for len(a) > 0 && len(b) > 0 {
+		if diff := bytes.Compare(a[0][:], b[0][:]); diff < 0 {
+			result[i] = a[0]
+			a = a[1:]
+		} else {
+			result[i] = b[0]
+			b = b[1:]
+			// If they were equal, progress a too
+			if diff == 0 {
+				a = a[1:]
+			}
+		}
+		i++
+	}
+	for j := 0; j < len(a); j++ {
+		result[i] = a[j]
+		i++
+	}
+	for j := 0; j < len(b); j++ {
+		result[i] = b[j]
+		i++
+	}
+	return result[:i]
+}
diff --git a/core/state/state_object.go b/core/state/state_object.go
index 98be56671..d10caa831 100644
--- a/core/state/state_object.go
+++ b/core/state/state_object.go
@@ -204,13 +204,13 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has
 		if metrics.EnabledExpensive {
 			defer func(start time.Time) { s.db.SnapshotStorageReads += time.Since(start) }(time.Now())
 		}
-		enc = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key[:]))
-	} else {
-		// Track the amount of time wasted on reading the storage trie
+		enc, err = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key[:]))
+	}
+	// If snapshot unavailable or reading from it failed, load from the database
+	if s.db.snap == nil || err != nil {
 		if metrics.EnabledExpensive {
 			defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now())
 		}
-		// Otherwise load the value from the database
 		if enc, err = s.getTrie(db).TryGet(key[:]); err != nil {
 			s.setError(err)
 			return common.Hash{}
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 0fb1095ce..7d7499892 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -511,25 +511,31 @@ func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject {
 		return obj
 	}
 	// If no live objects are available, attempt to use snapshots
-	var data Account
+	var (
+		data Account
+		err  error
+	)
 	if s.snap != nil {
 		if metrics.EnabledExpensive {
 			defer func(start time.Time) { s.SnapshotAccountReads += time.Since(start) }(time.Now())
 		}
-		acc := s.snap.Account(crypto.Keccak256Hash(addr[:]))
-		if acc == nil {
-			return nil
+		var acc *snapshot.Account
+		if acc, err = s.snap.Account(crypto.Keccak256Hash(addr[:])); err == nil {
+			if acc == nil {
+				return nil
+			}
+			data.Nonce, data.Balance, data.CodeHash = acc.Nonce, acc.Balance, acc.CodeHash
+			if len(data.CodeHash) == 0 {
+				data.CodeHash = emptyCodeHash
+			}
+			data.Root = common.BytesToHash(acc.Root)
+			if data.Root == (common.Hash{}) {
+				data.Root = emptyRoot
+			}
 		}
-		data.Nonce, data.Balance, data.CodeHash = acc.Nonce, acc.Balance, acc.CodeHash
-		if len(data.CodeHash) == 0 {
-			data.CodeHash = emptyCodeHash
-		}
-		data.Root = common.BytesToHash(acc.Root)
-		if data.Root == (common.Hash{}) {
-			data.Root = emptyRoot
-		}
-	} else {
-		// Snapshot unavailable, fall back to the trie
+	}
+	// If snapshot unavailable or reading from it failed, load from the database
+	if s.snap == nil || err != nil {
 		if metrics.EnabledExpensive {
 			defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now())
 		}

From d7d81d7c1255d613400e833d634579129c73f8de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Thu, 17 Oct 2019 18:30:31 +0300
Subject: [PATCH 03/28] core/state/snapshot: extract and split cap method,
 cover corners

---
 core/state/snapshot/difflayer.go      | 121 -----------
 core/state/snapshot/difflayer_test.go | 106 ----------
 core/state/snapshot/disklayer.go      |   6 -
 core/state/snapshot/snapshot.go       | 181 +++++++++++++++-
 core/state/snapshot/snapshot_test.go  | 286 ++++++++++++++++++++++++++
 5 files changed, 461 insertions(+), 239 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index c7a65e2a4..0f7a4223f 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -22,8 +22,6 @@ import (
 	"sync"
 
 	"github.com/ethereum/go-ethereum/common"
-	"github.com/ethereum/go-ethereum/core/rawdb"
-	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/rlp"
 )
@@ -169,125 +167,6 @@ func (dl *diffLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]by
 	return newDiffLayer(dl, dl.number+1, blockRoot, accounts, storage)
 }
 
-// Cap traverses downwards the diff tree until the number of allowed layers are
-// crossed. All diffs beyond the permitted number are flattened downwards. If
-// the layer limit is reached, memory cap is also enforced (but not before). The
-// block numbers for the disk layer and first diff layer are returned for GC.
-func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) {
-	// Dive until we run out of layers or reach the persistent database
-	if layers > 2 {
-		// If we still have diff layers below, recurse
-		if parent, ok := dl.parent.(*diffLayer); ok {
-			return parent.Cap(layers-1, memory)
-		}
-		// Diff stack too shallow, return block numbers without modifications
-		return dl.parent.(*diskLayer).number, dl.number
-	}
-	// We're out of layers, flatten anything below, stopping if it's the disk or if
-	// the memory limit is not yet exceeded.
-	switch parent := dl.parent.(type) {
-	case *diskLayer:
-		return parent.number, dl.number
-	case *diffLayer:
-		// Flatten the parent into the grandparent. The flattening internally obtains a
-		// write lock on grandparent.
-		flattened := parent.flatten().(*diffLayer)
-
-		dl.lock.Lock()
-		defer dl.lock.Unlock()
-
-		dl.parent = flattened
-		if flattened.memory < memory {
-			diskNumber, _ := flattened.parent.Info()
-			return diskNumber, flattened.number
-		}
-	default:
-		panic(fmt.Sprintf("unknown data layer: %T", parent))
-	}
-	// If the bottommost layer is larger than our memory cap, persist to disk
-	var (
-		parent = dl.parent.(*diffLayer)
-		base   = parent.parent.(*diskLayer)
-		batch  = base.db.NewBatch()
-	)
-	parent.lock.RLock()
-	defer parent.lock.RUnlock()
-
-	// Start by temporarily deleting the current snapshot block marker. This
-	// ensures that in the case of a crash, the entire snapshot is invalidated.
-	rawdb.DeleteSnapshotBlock(batch)
-
-	// Mark the original base as stale as we're going to create a new wrapper
-	base.lock.Lock()
-	if base.stale {
-		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
-	}
-	base.stale = true
-	base.lock.Unlock()
-
-	// Push all the accounts into the database
-	for hash, data := range parent.accountData {
-		if len(data) > 0 {
-			// Account was updated, push to disk
-			rawdb.WriteAccountSnapshot(batch, hash, data)
-			base.cache.Set(string(hash[:]), data)
-
-			if batch.ValueSize() > ethdb.IdealBatchSize {
-				if err := batch.Write(); err != nil {
-					log.Crit("Failed to write account snapshot", "err", err)
-				}
-				batch.Reset()
-			}
-		} else {
-			// Account was deleted, remove all storage slots too
-			rawdb.DeleteAccountSnapshot(batch, hash)
-			base.cache.Set(string(hash[:]), nil)
-
-			it := rawdb.IterateStorageSnapshots(base.db, hash)
-			for it.Next() {
-				if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
-					batch.Delete(key)
-					base.cache.Delete(string(key[1:]))
-				}
-			}
-			it.Release()
-		}
-	}
-	// Push all the storage slots into the database
-	for accountHash, storage := range parent.storageData {
-		for storageHash, data := range storage {
-			if len(data) > 0 {
-				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
-				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), data)
-			} else {
-				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
-				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), nil)
-			}
-		}
-		if batch.ValueSize() > ethdb.IdealBatchSize {
-			if err := batch.Write(); err != nil {
-				log.Crit("Failed to write storage snapshot", "err", err)
-			}
-			batch.Reset()
-		}
-	}
-	// Update the snapshot block marker and write any remainder data
-	newBase := &diskLayer{
-		root:    parent.root,
-		number:  parent.number,
-		cache:   base.cache,
-		db:      base.db,
-		journal: base.journal,
-	}
-	rawdb.WriteSnapshotBlock(batch, newBase.number, newBase.root)
-	if err := batch.Write(); err != nil {
-		log.Crit("Failed to write leftover snapshot", "err", err)
-	}
-	dl.parent = newBase
-
-	return newBase.number, dl.number
-}
-
 // flatten pushes all data from this point downwards, flattening everything into
 // a single diff at the bottom. Since usually the lowermost diff is the largest,
 // the flattening bulds up from there in reverse.
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 5a718c617..5499f2016 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -18,15 +18,11 @@ package snapshot
 
 import (
 	"bytes"
-	"fmt"
 	"math/big"
 	"math/rand"
 	"testing"
-	"time"
 
-	"github.com/allegro/bigcache"
 	"github.com/ethereum/go-ethereum/common"
-	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/rlp"
 )
 
@@ -192,113 +188,12 @@ func TestInsertAndMerge(t *testing.T) {
 	}
 }
 
-// TestCapTree tests some functionality regarding capping/flattening
-func TestCapTree(t *testing.T) {
-
-	var (
-		storage = make(map[common.Hash]map[common.Hash][]byte)
-	)
-	setAccount := func(accKey string) map[common.Hash][]byte {
-		return map[common.Hash][]byte{
-			common.HexToHash(accKey): randomAccount(),
-		}
-	}
-	// the bottom-most layer, aside from the 'disk layer'
-	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
-		Shards:             1,
-		LifeWindow:         time.Hour,
-		MaxEntriesInWindow: 1 * 1024,
-		MaxEntrySize:       1,
-		HardMaxCacheSize:   1,
-	})
-
-	base := &diskLayer{
-		journal: "",
-		db:      rawdb.NewMemoryDatabase(),
-		cache:   cache,
-		number:  0,
-		root:    common.HexToHash("0x01"),
-	}
-	// The lowest difflayer
-	a1 := base.Update(common.HexToHash("0xa1"), setAccount("0xa1"), storage)
-
-	a2 := a1.Update(common.HexToHash("0xa2"), setAccount("0xa2"), storage)
-	b2 := a1.Update(common.HexToHash("0xb2"), setAccount("0xb2"), storage)
-
-	a3 := a2.Update(common.HexToHash("0xa3"), setAccount("0xa3"), storage)
-	b3 := b2.Update(common.HexToHash("0xb3"), setAccount("0xb3"), storage)
-
-	checkExist := func(layer *diffLayer, key string) error {
-		accountKey := common.HexToHash(key)
-		data, _ := layer.Account(accountKey)
-		if data == nil {
-			return fmt.Errorf("expected %x to exist, got nil", accountKey)
-		}
-		return nil
-	}
-	shouldErr := func(layer *diffLayer, key string) error {
-		accountKey := common.HexToHash(key)
-		data, err := layer.Account(accountKey)
-		if err == nil {
-			return fmt.Errorf("expected error, got data %x", data)
-		}
-		return nil
-	}
-
-	// check basics
-	if err := checkExist(b3, "0xa1"); err != nil {
-		t.Error(err)
-	}
-	if err := checkExist(b3, "0xb2"); err != nil {
-		t.Error(err)
-	}
-	if err := checkExist(b3, "0xb3"); err != nil {
-		t.Error(err)
-	}
-	// Now, merge the a-chain
-	diskNum, diffNum := a3.Cap(0, 1024)
-	if diskNum != 0 {
-		t.Errorf("disk layer err, got %d exp %d", diskNum, 0)
-	}
-	if diffNum != 2 {
-		t.Errorf("diff layer err, got %d exp %d", diffNum, 2)
-	}
-	// At this point, a2 got merged into a1. Thus, a1 is now modified,
-	// and as a1 is the parent of b2, b2 should no longer be able to iterate into parent
-
-	// These should still be accessible
-	if err := checkExist(b3, "0xb2"); err != nil {
-		t.Error(err)
-	}
-	if err := checkExist(b3, "0xb3"); err != nil {
-		t.Error(err)
-	}
-	//b2ParentNum, _ := b2.parent.Info()
-	//if b2.parent.invalid == false
-	//	t.Errorf("err, exp parent to be invalid, got %v", b2.parent, b2ParentNum)
-	//}
-	// But these would need iteration into the modified parent:
-	if err := shouldErr(b3, "0xa1"); err != nil {
-		t.Error(err)
-	}
-	if err := shouldErr(b3, "0xa2"); err != nil {
-		t.Error(err)
-	}
-	if err := shouldErr(b3, "0xa3"); err != nil {
-		t.Error(err)
-	}
-}
-
 type emptyLayer struct{}
 
 func (emptyLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
 	panic("implement me")
 }
 
-func (emptyLayer) Cap(layers int, memory uint64) (uint64, uint64) {
-	panic("implement me")
-}
-
 func (emptyLayer) Journal() error {
 	panic("implement me")
 }
@@ -403,7 +298,6 @@ func BenchmarkSearchSlot(b *testing.B) {
 // Without sorting and tracking accountlist
 // BenchmarkFlatten-6   	     300	   5511511 ns/op
 func BenchmarkFlatten(b *testing.B) {
-
 	fill := func(parent snapshot, blocknum int) *diffLayer {
 		accounts := make(map[common.Hash][]byte)
 		storage := make(map[common.Hash]map[common.Hash][]byte)
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index a9839f01a..50321f154 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -126,12 +126,6 @@ func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]by
 	return newDiffLayer(dl, dl.number+1, blockHash, accounts, storage)
 }
 
-// Cap traverses downwards the diff tree until the number of allowed layers are
-// crossed. All diffs beyond the permitted number are flattened downwards.
-func (dl *diskLayer) Cap(layers int, memory uint64) (uint64, uint64) {
-	return dl.number, dl.number
-}
-
 // Journal commits an entire diff hierarchy to disk into a single journal file.
 func (dl *diskLayer) Journal() error {
 	// There's no journalling a disk layer
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 6a21d57dc..a18178977 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -73,11 +73,6 @@ type snapshot interface {
 	// copying everything.
 	Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
 
-	// Cap traverses downwards the diff tree until the number of allowed layers are
-	// crossed. All diffs beyond the permitted number are flattened downwards. The
-	// block numbers for the disk layer and first diff layer are returned for GC.
-	Cap(layers int, memory uint64) (uint64, uint64)
-
 	// Journal commits an entire diff hierarchy to disk into a single journal file.
 	// This is meant to be used during shutdown to persist the snapshot without
 	// flattening everything down (bad for reorgs).
@@ -169,11 +164,56 @@ func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) er
 	if snap == nil {
 		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
 	}
+	diff, ok := snap.(*diffLayer)
+	if !ok {
+		return fmt.Errorf("snapshot [%#x] is base layer", blockRoot)
+	}
 	// Run the internal capping and discard all stale layers
 	st.lock.Lock()
 	defer st.lock.Unlock()
 
-	diskNumber, diffNumber := snap.Cap(layers, memory)
+	var (
+		diskNumber uint64
+		diffNumber uint64
+	)
+	// Flattening the bottom-most diff layer requires special casing since there's
+	// no child to rewire to the grandparent. In that case we can fake a temporary
+	// child for the capping and then remove it.
+	switch layers {
+	case 0:
+		// If full commit was requested, flatten the diffs and merge onto disk
+		diff.lock.RLock()
+		base := diffToDisk(diff.flatten().(*diffLayer))
+		diff.lock.RUnlock()
+
+		st.layers[base.root] = base
+		diskNumber, diffNumber = base.number, base.number
+
+	case 1:
+		// If full flattening was requested, flatten the diffs but only merge if the
+		// memory limit was reached
+		var (
+			bottom *diffLayer
+			base   *diskLayer
+		)
+		diff.lock.RLock()
+		bottom = diff.flatten().(*diffLayer)
+		if bottom.memory >= memory {
+			base = diffToDisk(bottom)
+		}
+		diff.lock.RUnlock()
+
+		if base != nil {
+			st.layers[base.root] = base
+			diskNumber, diffNumber = base.number, base.number
+		} else {
+			st.layers[bottom.root] = bottom
+			diskNumber, diffNumber = bottom.parent.(*diskLayer).number, bottom.number
+		}
+
+	default:
+		diskNumber, diffNumber = st.cap(diff, layers, memory)
+	}
 	for root, snap := range st.layers {
 		if number, _ := snap.Info(); number != diskNumber && number < diffNumber {
 			delete(st.layers, root)
@@ -182,6 +222,135 @@ func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) er
 	return nil
 }
 
+// cap traverses downwards the diff tree until the number of allowed layers are
+// crossed. All diffs beyond the permitted number are flattened downwards. If
+// the layer limit is reached, memory cap is also enforced (but not before). The
+// block numbers for the disk layer and first diff layer are returned for GC.
+func (st *SnapshotTree) cap(diff *diffLayer, layers int, memory uint64) (uint64, uint64) {
+	// Dive until we run out of layers or reach the persistent database
+	if layers > 2 {
+		// If we still have diff layers below, recurse
+		if parent, ok := diff.parent.(*diffLayer); ok {
+			return st.cap(parent, layers-1, memory)
+		}
+		// Diff stack too shallow, return block numbers without modifications
+		return diff.parent.(*diskLayer).number, diff.number
+	}
+	// We're out of layers, flatten anything below, stopping if it's the disk or if
+	// the memory limit is not yet exceeded.
+	switch parent := diff.parent.(type) {
+	case *diskLayer:
+		return parent.number, diff.number
+
+	case *diffLayer:
+		// Flatten the parent into the grandparent. The flattening internally obtains a
+		// write lock on grandparent.
+		flattened := parent.flatten().(*diffLayer)
+		st.layers[flattened.root] = flattened
+
+		diff.lock.Lock()
+		defer diff.lock.Unlock()
+
+		diff.parent = flattened
+		if flattened.memory < memory {
+			diskNumber, _ := flattened.parent.Info()
+			return diskNumber, flattened.number
+		}
+	default:
+		panic(fmt.Sprintf("unknown data layer: %T", parent))
+	}
+	// If the bottom-most layer is larger than our memory cap, persist to disk
+	bottom := diff.parent.(*diffLayer)
+
+	bottom.lock.RLock()
+	base := diffToDisk(bottom)
+	bottom.lock.RUnlock()
+
+	st.layers[base.root] = base
+	diff.parent = base
+
+	return base.number, diff.number
+}
+
+// diffToDisk merges a bottom-most diff into the persistent disk layer underneath
+// it. The method will panic if called onto a non-bottom-most diff layer.
+func diffToDisk(bottom *diffLayer) *diskLayer {
+	var (
+		base  = bottom.parent.(*diskLayer)
+		batch = base.db.NewBatch()
+	)
+	// Start by temporarily deleting the current snapshot block marker. This
+	// ensures that in the case of a crash, the entire snapshot is invalidated.
+	rawdb.DeleteSnapshotBlock(batch)
+
+	// Mark the original base as stale as we're going to create a new wrapper
+	base.lock.Lock()
+	if base.stale {
+		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
+	}
+	base.stale = true
+	base.lock.Unlock()
+
+	// Push all the accounts into the database
+	for hash, data := range bottom.accountData {
+		if len(data) > 0 {
+			// Account was updated, push to disk
+			rawdb.WriteAccountSnapshot(batch, hash, data)
+			base.cache.Set(string(hash[:]), data)
+
+			if batch.ValueSize() > ethdb.IdealBatchSize {
+				if err := batch.Write(); err != nil {
+					log.Crit("Failed to write account snapshot", "err", err)
+				}
+				batch.Reset()
+			}
+		} else {
+			// Account was deleted, remove all storage slots too
+			rawdb.DeleteAccountSnapshot(batch, hash)
+			base.cache.Set(string(hash[:]), nil)
+
+			it := rawdb.IterateStorageSnapshots(base.db, hash)
+			for it.Next() {
+				if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
+					batch.Delete(key)
+					base.cache.Delete(string(key[1:]))
+				}
+			}
+			it.Release()
+		}
+	}
+	// Push all the storage slots into the database
+	for accountHash, storage := range bottom.storageData {
+		for storageHash, data := range storage {
+			if len(data) > 0 {
+				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
+				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), data)
+			} else {
+				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
+				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), nil)
+			}
+		}
+		if batch.ValueSize() > ethdb.IdealBatchSize {
+			if err := batch.Write(); err != nil {
+				log.Crit("Failed to write storage snapshot", "err", err)
+			}
+			batch.Reset()
+		}
+	}
+	// Update the snapshot block marker and write any remainder data
+	rawdb.WriteSnapshotBlock(batch, bottom.number, bottom.root)
+	if err := batch.Write(); err != nil {
+		log.Crit("Failed to write leftover snapshot", "err", err)
+	}
+	return &diskLayer{
+		root:    bottom.root,
+		number:  bottom.number,
+		cache:   base.cache,
+		db:      base.db,
+		journal: base.journal,
+	}
+}
+
 // Journal commits an entire diff hierarchy to disk into a single journal file.
 // This is meant to be used during shutdown to persist the snapshot without
 // flattening everything down (bad for reorgs).
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index 903bd4a6f..ecd39bf3e 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -15,3 +15,289 @@
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 
 package snapshot
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/allegro/bigcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+)
+
+// Tests that if a disk layer becomes stale, no active external references will
+// be returned with junk data. This version of the test flattens every diff layer
+// to check internal corner case around the bottom-most memory accumulator.
+func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
+	base := &diskLayer{
+		db:    rawdb.NewMemoryDatabase(),
+		root:  common.HexToHash("0x01"),
+		cache: cache,
+	}
+	snaps := &SnapshotTree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Retrieve a reference to the base and commit a diff on top
+	ref := snaps.Snapshot(base.root)
+
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	storage := make(map[common.Hash]map[common.Hash][]byte)
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 2 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 2)
+	}
+	// Commit the diff layer onto the disk and ensure it's persisted
+	if err := snaps.Cap(common.HexToHash("0x02"), 0, 0); err != nil {
+		t.Fatalf("failed to merge diff layer onto disk: %v", err)
+	}
+	// Since the base layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 1 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 1)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// Tests that if a disk layer becomes stale, no active external references will
+// be returned with junk data. This version of the test retains the bottom diff
+// layer to check the usual mode of operation where the accumulator is retained.
+func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
+	base := &diskLayer{
+		db:    rawdb.NewMemoryDatabase(),
+		root:  common.HexToHash("0x01"),
+		cache: cache,
+	}
+	snaps := &SnapshotTree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Retrieve a reference to the base and commit two diffs on top
+	ref := snaps.Snapshot(base.root)
+
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	storage := make(map[common.Hash]map[common.Hash][]byte)
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 3 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3)
+	}
+	// Commit the diff layer onto the disk and ensure it's persisted
+	if err := snaps.Cap(common.HexToHash("0x03"), 2, 0); err != nil {
+		t.Fatalf("failed to merge diff layer onto disk: %v", err)
+	}
+	// Since the base layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 2 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 2)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// Tests that if a diff layer becomes stale, no active external references will
+// be returned with junk data. This version of the test flattens every diff layer
+// to check internal corner case around the bottom-most memory accumulator.
+func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
+	base := &diskLayer{
+		db:    rawdb.NewMemoryDatabase(),
+		root:  common.HexToHash("0x01"),
+		cache: cache,
+	}
+	snaps := &SnapshotTree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Commit two diffs on top and retrieve a reference to the bottommost
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	storage := make(map[common.Hash]map[common.Hash][]byte)
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 3 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3)
+	}
+	ref := snaps.Snapshot(common.HexToHash("0x02"))
+
+	// Flatten the diff layer into the bottom accumulator
+	if err := snaps.Cap(common.HexToHash("0x03"), 1, 1024*1024); err != nil {
+		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
+	}
+	// Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 2 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 2)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// Tests that if a diff layer becomes stale, no active external references will
+// be returned with junk data. This version of the test retains the bottom diff
+// layer to check the usual mode of operation where the accumulator is retained.
+func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
+	base := &diskLayer{
+		db:    rawdb.NewMemoryDatabase(),
+		root:  common.HexToHash("0x01"),
+		cache: cache,
+	}
+	snaps := &SnapshotTree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Commit three diffs on top and retrieve a reference to the bottommost
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	storage := make(map[common.Hash]map[common.Hash][]byte)
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), accounts, storage); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 4 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 4)
+	}
+	ref := snaps.Snapshot(common.HexToHash("0x02"))
+
+	// Flatten the diff layer into the bottom accumulator
+	if err := snaps.Cap(common.HexToHash("0x04"), 2, 1024*1024); err != nil {
+		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
+	}
+	// Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 3 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 3)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// TestPostCapBasicDataAccess tests some functionality regarding capping/flattening.
+func TestPostCapBasicDataAccess(t *testing.T) {
+	// setAccount is a helper to construct a random account entry and assign it to
+	// an account slot in a snapshot
+	setAccount := func(accKey string) map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			common.HexToHash(accKey): randomAccount(),
+		}
+	}
+	// Create a starting base layer and a snapshot tree out of it
+	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
+	base := &diskLayer{
+		db:    rawdb.NewMemoryDatabase(),
+		root:  common.HexToHash("0x01"),
+		cache: cache,
+	}
+	snaps := &SnapshotTree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// The lowest difflayer
+	snaps.Update(common.HexToHash("0xa1"), common.HexToHash("0x01"), setAccount("0xa1"), nil)
+	snaps.Update(common.HexToHash("0xa2"), common.HexToHash("0xa1"), setAccount("0xa2"), nil)
+	snaps.Update(common.HexToHash("0xb2"), common.HexToHash("0xa1"), setAccount("0xb2"), nil)
+
+	snaps.Update(common.HexToHash("0xa3"), common.HexToHash("0xa2"), setAccount("0xa3"), nil)
+	snaps.Update(common.HexToHash("0xb3"), common.HexToHash("0xb2"), setAccount("0xb3"), nil)
+
+	// checkExist verifies if an account exiss in a snapshot
+	checkExist := func(layer *diffLayer, key string) error {
+		if data, _ := layer.Account(common.HexToHash(key)); data == nil {
+			return fmt.Errorf("expected %x to exist, got nil", common.HexToHash(key))
+		}
+		return nil
+	}
+	// shouldErr checks that an account access errors as expected
+	shouldErr := func(layer *diffLayer, key string) error {
+		if data, err := layer.Account(common.HexToHash(key)); err == nil {
+			return fmt.Errorf("expected error, got data %x", data)
+		}
+		return nil
+	}
+	// check basics
+	snap := snaps.Snapshot(common.HexToHash("0xb3")).(*diffLayer)
+
+	if err := checkExist(snap, "0xa1"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(snap, "0xb2"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(snap, "0xb3"); err != nil {
+		t.Error(err)
+	}
+	// Now, merge the a-chain
+	snaps.Cap(common.HexToHash("0xa3"), 0, 1024)
+
+	// At this point, a2 got merged into a1. Thus, a1 is now modified, and as a1 is
+	// the parent of b2, b2 should no longer be able to iterate into parent.
+
+	// These should still be accessible
+	if err := checkExist(snap, "0xb2"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(snap, "0xb3"); err != nil {
+		t.Error(err)
+	}
+	// But these would need iteration into the modified parent
+	if err := shouldErr(snap, "0xa1"); err != nil {
+		t.Error(err)
+	}
+	if err := shouldErr(snap, "0xa2"); err != nil {
+		t.Error(err)
+	}
+	if err := shouldErr(snap, "0xa3"); err != nil {
+		t.Error(err)
+	}
+}

From cdf3f016dfc19b0f5a6471f2b649519b7256d68d Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Wed, 23 Oct 2019 15:19:02 +0200
Subject: [PATCH 04/28] snapshot: iteration and buffering optimizations

---
 core/state/snapshot/difflayer.go         |  3 --
 core/state/snapshot/difflayer_journal.go | 22 +++++++++---
 core/state/snapshot/difflayer_test.go    | 45 ++++++++++++++++++++++++
 core/state/snapshot/snapshot.go          | 25 +++++++------
 core/state/snapshot/snapshot_test.go     | 18 ++++++++++
 5 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 0f7a4223f..644c8fb4b 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -227,9 +227,6 @@ func (dl *diffLayer) flatten() snapshot {
 // This is meant to be used during shutdown to persist the snapshot without
 // flattening everything down (bad for reorgs).
 func (dl *diffLayer) Journal() error {
-	dl.lock.RLock()
-	defer dl.lock.RUnlock()
-
 	writer, err := dl.journal()
 	if err != nil {
 		return err
diff --git a/core/state/snapshot/difflayer_journal.go b/core/state/snapshot/difflayer_journal.go
index 844ee8859..16fdb8a97 100644
--- a/core/state/snapshot/difflayer_journal.go
+++ b/core/state/snapshot/difflayer_journal.go
@@ -17,6 +17,7 @@
 package snapshot
 
 import (
+	"bufio"
 	"fmt"
 	"io"
 	"os"
@@ -105,12 +106,22 @@ func (dl *diffLayer) journal() (io.WriteCloser, error) {
 		}
 		writer = file
 	}
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	if dl.stale {
+		writer.Close()
+		return nil, ErrSnapshotStale
+	}
+	buf := bufio.NewWriter(writer)
 	// Everything below was journalled, persist this layer too
-	if err := rlp.Encode(writer, dl.number); err != nil {
+	if err := rlp.Encode(buf, dl.number); err != nil {
+		buf.Flush()
 		writer.Close()
 		return nil, err
 	}
-	if err := rlp.Encode(writer, dl.root); err != nil {
+	if err := rlp.Encode(buf, dl.root); err != nil {
+		buf.Flush()
 		writer.Close()
 		return nil, err
 	}
@@ -118,7 +129,8 @@ func (dl *diffLayer) journal() (io.WriteCloser, error) {
 	for hash, blob := range dl.accountData {
 		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
 	}
-	if err := rlp.Encode(writer, accounts); err != nil {
+	if err := rlp.Encode(buf, accounts); err != nil {
+		buf.Flush()
 		writer.Close()
 		return nil, err
 	}
@@ -132,9 +144,11 @@ func (dl *diffLayer) journal() (io.WriteCloser, error) {
 		}
 		storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
 	}
-	if err := rlp.Encode(writer, storage); err != nil {
+	if err := rlp.Encode(buf, storage); err != nil {
+		buf.Flush()
 		writer.Close()
 		return nil, err
 	}
+	buf.Flush()
 	return writer, nil
 }
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 5499f2016..5b7907301 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -20,6 +20,8 @@ import (
 	"bytes"
 	"math/big"
 	"math/rand"
+	"os"
+	"path"
 	"testing"
 
 	"github.com/ethereum/go-ethereum/common"
@@ -340,3 +342,46 @@ func BenchmarkFlatten(b *testing.B) {
 		b.StopTimer()
 	}
 }
+
+// This test writes ~324M of diff layers to disk, spread over
+// - 128 individual layers,
+// - each with 200 accounts
+// - containing 200 slots
+//
+// BenchmarkJournal-6   	       1	1471373923 ns/ops
+// BenchmarkJournal-6   	       1	1208083335 ns/op // bufio writer
+func BenchmarkJournal(b *testing.B) {
+	fill := func(parent snapshot, blocknum int) *diffLayer {
+		accounts := make(map[common.Hash][]byte)
+		storage := make(map[common.Hash]map[common.Hash][]byte)
+
+		for i := 0; i < 200; i++ {
+			accountKey := randomHash()
+			accounts[accountKey] = randomAccount()
+
+			accStorage := make(map[common.Hash][]byte)
+			for i := 0; i < 200; i++ {
+				value := make([]byte, 32)
+				rand.Read(value)
+				accStorage[randomHash()] = value
+
+			}
+			storage[accountKey] = accStorage
+		}
+		return newDiffLayer(parent, uint64(blocknum), common.Hash{}, accounts, storage)
+	}
+
+	var layer snapshot
+	layer = &diskLayer{
+		journal: path.Join(os.TempDir(), "difflayer_journal.tmp"),
+	}
+	for i := 1; i < 128; i++ {
+		layer = fill(layer, i)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		f, _ := layer.(*diffLayer).journal()
+		f.Close()
+	}
+}
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index a18178977..668522fec 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -160,13 +160,15 @@ func (st *SnapshotTree) Update(blockRoot common.Hash, parentRoot common.Hash, ac
 // are flattened downwards.
 func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) error {
 	// Retrieve the head snapshot to cap from
-	snap := st.Snapshot(blockRoot).(snapshot)
-	if snap == nil {
+	var snap snapshot
+	if s := st.Snapshot(blockRoot); s == nil {
 		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
+	} else {
+		snap = s.(snapshot)
 	}
 	diff, ok := snap.(*diffLayer)
 	if !ok {
-		return fmt.Errorf("snapshot [%#x] is base layer", blockRoot)
+		return fmt.Errorf("snapshot [%#x] is disk layer", blockRoot)
 	}
 	// Run the internal capping and discard all stale layers
 	st.lock.Lock()
@@ -228,13 +230,14 @@ func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) er
 // block numbers for the disk layer and first diff layer are returned for GC.
 func (st *SnapshotTree) cap(diff *diffLayer, layers int, memory uint64) (uint64, uint64) {
 	// Dive until we run out of layers or reach the persistent database
-	if layers > 2 {
-		// If we still have diff layers below, recurse
+	for ; layers > 2; layers-- {
+		// If we still have diff layers below, continue down
 		if parent, ok := diff.parent.(*diffLayer); ok {
-			return st.cap(parent, layers-1, memory)
+			diff = parent
+		} else {
+			// Diff stack too shallow, return block numbers without modifications
+			return diff.parent.(*diskLayer).number, diff.number
 		}
-		// Diff stack too shallow, return block numbers without modifications
-		return diff.parent.(*diskLayer).number, diff.number
 	}
 	// We're out of layers, flatten anything below, stopping if it's the disk or if
 	// the memory limit is not yet exceeded.
@@ -356,9 +359,11 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 // flattening everything down (bad for reorgs).
 func (st *SnapshotTree) Journal(blockRoot common.Hash) error {
 	// Retrieve the head snapshot to journal from
-	snap := st.Snapshot(blockRoot).(snapshot)
-	if snap == nil {
+	var snap snapshot
+	if s := st.Snapshot(blockRoot); s == nil {
 		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
+	} else {
+		snap = s.(snapshot)
 	}
 	// Run the journaling
 	st.lock.Lock()
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index ecd39bf3e..1551a71a2 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -205,6 +205,15 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	}
 	ref := snaps.Snapshot(common.HexToHash("0x02"))
 
+	// Doing a Cap operation with many allowed layers should be a no-op
+	exp := len(snaps.layers)
+	if err := snaps.Cap(common.HexToHash("0x04"), 2000, 1024*1024); err != nil {
+		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
+	}
+	if got := len(snaps.layers); got != exp {
+		t.Errorf("layers modified, got %d exp %d", got, exp)
+	}
+
 	// Flatten the diff layer into the bottom accumulator
 	if err := snaps.Cap(common.HexToHash("0x04"), 2, 1024*1024); err != nil {
 		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
@@ -277,6 +286,10 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 	if err := checkExist(snap, "0xb3"); err != nil {
 		t.Error(err)
 	}
+	// Cap to a bad root should fail
+	if err := snaps.Cap(common.HexToHash("0x1337"), 0, 1024); err == nil {
+		t.Errorf("expected error, got none")
+	}
 	// Now, merge the a-chain
 	snaps.Cap(common.HexToHash("0xa3"), 0, 1024)
 
@@ -300,4 +313,9 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 	if err := shouldErr(snap, "0xa3"); err != nil {
 		t.Error(err)
 	}
+	// Now, merge it again, just for fun. It should now error, since a3
+	// is a disk layer
+	if err := snaps.Cap(common.HexToHash("0xa3"), 0, 1024); err == nil {
+		t.Error("expected error capping the disk layer, got none")
+	}
 }

From d754091a87703278b744815ccdcc499df66c9c1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Fri, 22 Nov 2019 13:23:49 +0200
Subject: [PATCH 05/28] core/state/snapshot: unlink snapshots from blocks,
 quad->linear cleanup

---
 core/blockchain.go                       |  10 +-
 core/rawdb/accessors_snapshot.go         |  40 +++---
 core/rawdb/schema.go                     |   4 +-
 core/state/snapshot/difflayer.go         |  27 ++--
 core/state/snapshot/difflayer_journal.go |  25 +---
 core/state/snapshot/difflayer_test.go    |  56 +++-----
 core/state/snapshot/disklayer.go         |  22 ++-
 core/state/snapshot/generate.go          |  11 +-
 core/state/snapshot/generate_test.go     |  10 +-
 core/state/snapshot/snapshot.go          | 175 +++++++++++++----------
 core/state/snapshot/snapshot_test.go     |  11 +-
 core/state/statedb.go                    |  18 +--
 12 files changed, 203 insertions(+), 206 deletions(-)

diff --git a/core/blockchain.go b/core/blockchain.go
index 676a72c77..6fb722d2d 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -140,10 +140,10 @@ type BlockChain struct {
 	chainConfig *params.ChainConfig // Chain & network configuration
 	cacheConfig *CacheConfig        // Cache configuration for pruning
 
-	db     ethdb.Database         // Low level persistent database to store final content in
-	snaps  *snapshot.SnapshotTree // Snapshot tree for fast trie leaf access
-	triegc *prque.Prque           // Priority queue mapping block numbers to tries to gc
-	gcproc time.Duration          // Accumulates canonical block processing for trie dumping
+	db     ethdb.Database // Low level persistent database to store final content in
+	snaps  *snapshot.Tree // Snapshot tree for fast trie leaf access
+	triegc *prque.Prque   // Priority queue mapping block numbers to tries to gc
+	gcproc time.Duration  // Accumulates canonical block processing for trie dumping
 
 	hc            *HeaderChain
 	rmLogsFeed    event.Feed
@@ -301,7 +301,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 	}
 	// Load any existing snapshot, regenerating it if loading failed
 	head := bc.CurrentBlock()
-	if bc.snaps, err = snapshot.New(bc.db, "snapshot.rlp", head.NumberU64(), head.Root()); err != nil {
+	if bc.snaps, err = snapshot.New(bc.db, "snapshot.rlp", head.Root()); err != nil {
 		return nil, err
 	}
 	// Take ownership of this particular state
diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go
index 9989e6b50..9388e857b 100644
--- a/core/rawdb/accessors_snapshot.go
+++ b/core/rawdb/accessors_snapshot.go
@@ -17,38 +17,36 @@
 package rawdb
 
 import (
-	"encoding/binary"
-
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
 )
 
-// ReadSnapshotBlock retrieves the number and root of the block whose state is
-// contained in the persisted snapshot.
-func ReadSnapshotBlock(db ethdb.KeyValueReader) (uint64, common.Hash) {
-	data, _ := db.Get(snapshotBlockKey)
-	if len(data) != 8+common.HashLength {
-		return 0, common.Hash{}
+// ReadSnapshotRoot retrieves the root of the block whose state is contained in
+// the persisted snapshot.
+func ReadSnapshotRoot(db ethdb.KeyValueReader) common.Hash {
+	data, _ := db.Get(snapshotRootKey)
+	if len(data) != common.HashLength {
+		return common.Hash{}
 	}
-	return binary.BigEndian.Uint64(data[:8]), common.BytesToHash(data[8:])
+	return common.BytesToHash(data)
 }
 
-// WriteSnapshotBlock stores the number and root of the block whose state is
-// contained in the persisted snapshot.
-func WriteSnapshotBlock(db ethdb.KeyValueWriter, number uint64, root common.Hash) {
-	if err := db.Put(snapshotBlockKey, append(encodeBlockNumber(number), root.Bytes()...)); err != nil {
-		log.Crit("Failed to store snapsnot block's number and root", "err", err)
+// WriteSnapshotRoot stores the root of the block whose state is contained in
+// the persisted snapshot.
+func WriteSnapshotRoot(db ethdb.KeyValueWriter, root common.Hash) {
+	if err := db.Put(snapshotRootKey, root[:]); err != nil {
+		log.Crit("Failed to store snapshot root", "err", err)
 	}
 }
 
-// DeleteSnapshotBlock deletes the number and hash of the block whose state is
-// contained in the persisted snapshot. Since snapshots are not immutable, this
-// method can be used during updates, so a crash or failure will mark the entire
-// snapshot invalid.
-func DeleteSnapshotBlock(db ethdb.KeyValueWriter) {
-	if err := db.Delete(snapshotBlockKey); err != nil {
-		log.Crit("Failed to remove snapsnot block's number and hash", "err", err)
+// DeleteSnapshotRoot deletes the hash of the block whose state is contained in
+// the persisted snapshot. Since snapshots are not immutable, this  method can
+// be used during updates, so a crash or failure will mark the entire snapshot
+// invalid.
+func DeleteSnapshotRoot(db ethdb.KeyValueWriter) {
+	if err := db.Delete(snapshotRootKey); err != nil {
+		log.Crit("Failed to remove snapshot root", "err", err)
 	}
 }
 
diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go
index 8e611246a..d20658792 100644
--- a/core/rawdb/schema.go
+++ b/core/rawdb/schema.go
@@ -41,8 +41,8 @@ var (
 	// fastTrieProgressKey tracks the number of trie entries imported during fast sync.
 	fastTrieProgressKey = []byte("TrieSync")
 
-	// snapshotBlockKey tracks the number and hash of the last snapshot.
-	snapshotBlockKey = []byte("SnapshotBlock")
+	// snapshotRootKey tracks the number and hash of the last snapshot.
+	snapshotRootKey = []byte("SnapshotRoot")
 
 	// Data item prefixes (use single byte to avoid mixing data types, avoid `i`, used for indexes).
 	headerPrefix       = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 644c8fb4b..7e8487ea8 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -36,9 +36,8 @@ type diffLayer struct {
 	parent snapshot // Parent snapshot modified by this one, never nil
 	memory uint64   // Approximate guess as to how much memory we use
 
-	number uint64      // Block number to which this snapshot diff belongs to
-	root   common.Hash // Root hash to which this snapshot diff belongs to
-	stale  bool        // Signals that the layer became stale (state progressed)
+	root  common.Hash // Root hash to which this snapshot diff belongs to
+	stale bool        // Signals that the layer became stale (state progressed)
 
 	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
 	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
@@ -50,11 +49,10 @@ type diffLayer struct {
 
 // newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
 // level persistent database or a hierarchical diff already.
-func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
 	// Create the new layer with some pre-allocated data segments
 	dl := &diffLayer{
 		parent:      parent,
-		number:      number,
 		root:        root,
 		accountData: accounts,
 		storageData: storage,
@@ -63,7 +61,6 @@ func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map
 	for _, data := range accounts {
 		dl.memory += uint64(len(data))
 	}
-
 	// Fill the storage hashes and sort them for the iterator
 	dl.storageList = make(map[common.Hash][]common.Hash)
 
@@ -93,9 +90,18 @@ func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map
 	return dl
 }
 
-// Info returns the block number and root hash for which this snapshot was made.
-func (dl *diffLayer) Info() (uint64, common.Hash) {
-	return dl.number, dl.root
+// Root returns the root hash for which this snapshot was made.
+func (dl *diffLayer) Root() common.Hash {
+	return dl.root
+}
+
+// Stale return whether this layer has become stale (was flattened across) or if
+// it's still live.
+func (dl *diffLayer) Stale() bool {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	return dl.stale
 }
 
 // Account directly retrieves the account associated with a particular hash in
@@ -164,7 +170,7 @@ func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 // Update creates a new layer on top of the existing snapshot diff tree with
 // the specified data items.
 func (dl *diffLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
-	return newDiffLayer(dl, dl.number+1, blockRoot, accounts, storage)
+	return newDiffLayer(dl, blockRoot, accounts, storage)
 }
 
 // flatten pushes all data from this point downwards, flattening everything into
@@ -213,7 +219,6 @@ func (dl *diffLayer) flatten() snapshot {
 	// Return the combo parent
 	return &diffLayer{
 		parent:      parent.parent,
-		number:      dl.number,
 		root:        dl.root,
 		storageList: parent.storageList,
 		storageData: parent.storageData,
diff --git a/core/state/snapshot/difflayer_journal.go b/core/state/snapshot/difflayer_journal.go
index 16fdb8a97..5490531be 100644
--- a/core/state/snapshot/difflayer_journal.go
+++ b/core/state/snapshot/difflayer_journal.go
@@ -43,18 +43,12 @@ type journalStorage struct {
 // diff and verifying that it can be linked to the requested parent.
 func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
 	// Read the next diff journal entry
-	var (
-		number uint64
-		root   common.Hash
-	)
-	if err := r.Decode(&number); err != nil {
+	var root common.Hash
+	if err := r.Decode(&root); err != nil {
 		// The first read may fail with EOF, marking the end of the journal
 		if err == io.EOF {
 			return parent, nil
 		}
-		return nil, fmt.Errorf("load diff number: %v", err)
-	}
-	if err := r.Decode(&root); err != nil {
 		return nil, fmt.Errorf("load diff root: %v", err)
 	}
 	var accounts []journalAccount
@@ -77,13 +71,7 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
 		}
 		storageData[entry.Hash] = slots
 	}
-	// Validate the block number to avoid state corruption
-	if parent, ok := parent.(*diffLayer); ok {
-		if number != parent.number+1 {
-			return nil, fmt.Errorf("snapshot chain broken: block #%d after #%d", number, parent.number)
-		}
-	}
-	return loadDiffLayer(newDiffLayer(parent, number, root, accountData, storageData), r)
+	return loadDiffLayer(newDiffLayer(parent, root, accountData, storageData), r)
 }
 
 // journal is the internal version of Journal that also returns the journal file
@@ -113,13 +101,8 @@ func (dl *diffLayer) journal() (io.WriteCloser, error) {
 		writer.Close()
 		return nil, ErrSnapshotStale
 	}
-	buf := bufio.NewWriter(writer)
 	// Everything below was journalled, persist this layer too
-	if err := rlp.Encode(buf, dl.number); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, err
-	}
+	buf := bufio.NewWriter(writer)
 	if err := rlp.Encode(buf, dl.root); err != nil {
 		buf.Flush()
 		writer.Close()
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 5b7907301..7cd1e8062 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -61,11 +61,11 @@ func TestMergeBasics(t *testing.T) {
 		}
 	}
 	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer{}, 1, common.Hash{}, accounts, storage)
-	child := newDiffLayer(parent, 1, common.Hash{}, accounts, storage)
-	child = newDiffLayer(child, 1, common.Hash{}, accounts, storage)
-	child = newDiffLayer(child, 1, common.Hash{}, accounts, storage)
-	child = newDiffLayer(child, 1, common.Hash{}, accounts, storage)
+	parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
+	child := newDiffLayer(parent, common.Hash{}, accounts, storage)
+	child = newDiffLayer(child, common.Hash{}, accounts, storage)
+	child = newDiffLayer(child, common.Hash{}, accounts, storage)
+	child = newDiffLayer(child, common.Hash{}, accounts, storage)
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
 
@@ -122,7 +122,7 @@ func TestMergeDelete(t *testing.T) {
 	}
 
 	// Add some flip-flopping layers on top
-	parent := newDiffLayer(emptyLayer{}, 1, common.Hash{}, flip(), storage)
+	parent := newDiffLayer(emptyLayer{}, common.Hash{}, flip(), storage)
 	child := parent.Update(common.Hash{}, flop(), storage)
 	child = child.Update(common.Hash{}, flip(), storage)
 	child = child.Update(common.Hash{}, flop(), storage)
@@ -139,10 +139,6 @@ func TestMergeDelete(t *testing.T) {
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
 
-	// check number
-	if got, exp := merged.number, child.number; got != exp {
-		t.Errorf("merged layer: wrong number - exp %d got %d", exp, got)
-	}
 	if data, _ := merged.Account(h1); data == nil {
 		t.Errorf("merged layer: expected %x to be non-nil", h1)
 	}
@@ -169,7 +165,7 @@ func TestInsertAndMerge(t *testing.T) {
 	{
 		var accounts = make(map[common.Hash][]byte)
 		var storage = make(map[common.Hash]map[common.Hash][]byte)
-		parent = newDiffLayer(emptyLayer{}, 1, common.Hash{}, accounts, storage)
+		parent = newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
 	}
 	{
 		var accounts = make(map[common.Hash][]byte)
@@ -178,7 +174,7 @@ func TestInsertAndMerge(t *testing.T) {
 		accstorage := make(map[common.Hash][]byte)
 		storage[acc] = accstorage
 		storage[acc][slot] = []byte{0x01}
-		child = newDiffLayer(parent, 2, common.Hash{}, accounts, storage)
+		child = newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
@@ -200,11 +196,12 @@ func (emptyLayer) Journal() error {
 	panic("implement me")
 }
 
-func (emptyLayer) Info() (uint64, common.Hash) {
-	return 0, common.Hash{}
+func (emptyLayer) Stale() bool {
+	panic("implement me")
 }
-func (emptyLayer) Number() uint64 {
-	return 0
+
+func (emptyLayer) Root() common.Hash {
+	return common.Hash{}
 }
 
 func (emptyLayer) Account(hash common.Hash) (*Account, error) {
@@ -227,8 +224,6 @@ func (emptyLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error)
 // BenchmarkSearch-6   	  500000	      3723 ns/op (10k per layer, only top-level RLock()
 func BenchmarkSearch(b *testing.B) {
 	// First, we set up 128 diff layers, with 1K items each
-
-	blocknum := uint64(0)
 	fill := func(parent snapshot) *diffLayer {
 		accounts := make(map[common.Hash][]byte)
 		storage := make(map[common.Hash]map[common.Hash][]byte)
@@ -236,10 +231,8 @@ func BenchmarkSearch(b *testing.B) {
 		for i := 0; i < 10000; i++ {
 			accounts[randomHash()] = randomAccount()
 		}
-		blocknum++
-		return newDiffLayer(parent, blocknum, common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
-
 	var layer snapshot
 	layer = emptyLayer{}
 	for i := 0; i < 128; i++ {
@@ -261,8 +254,6 @@ func BenchmarkSearch(b *testing.B) {
 // BenchmarkSearchSlot-6   	  100000	     14551 ns/op (when checking parent number using atomic)
 func BenchmarkSearchSlot(b *testing.B) {
 	// First, we set up 128 diff layers, with 1K items each
-
-	blocknum := uint64(0)
 	accountKey := common.Hash{}
 	storageKey := common.HexToHash("0x1337")
 	accountRLP := randomAccount()
@@ -278,16 +269,13 @@ func BenchmarkSearchSlot(b *testing.B) {
 			accStorage[randomHash()] = value
 			storage[accountKey] = accStorage
 		}
-		blocknum++
-		return newDiffLayer(parent, blocknum, common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
-
 	var layer snapshot
 	layer = emptyLayer{}
 	for i := 0; i < 128; i++ {
 		layer = fill(layer)
 	}
-
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		layer.Storage(accountKey, storageKey)
@@ -300,7 +288,7 @@ func BenchmarkSearchSlot(b *testing.B) {
 // Without sorting and tracking accountlist
 // BenchmarkFlatten-6   	     300	   5511511 ns/op
 func BenchmarkFlatten(b *testing.B) {
-	fill := func(parent snapshot, blocknum int) *diffLayer {
+	fill := func(parent snapshot) *diffLayer {
 		accounts := make(map[common.Hash][]byte)
 		storage := make(map[common.Hash]map[common.Hash][]byte)
 
@@ -317,7 +305,7 @@ func BenchmarkFlatten(b *testing.B) {
 			}
 			storage[accountKey] = accStorage
 		}
-		return newDiffLayer(parent, uint64(blocknum), common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
 
 	b.ResetTimer()
@@ -327,7 +315,7 @@ func BenchmarkFlatten(b *testing.B) {
 		var layer snapshot
 		layer = emptyLayer{}
 		for i := 1; i < 128; i++ {
-			layer = fill(layer, i)
+			layer = fill(layer)
 		}
 		b.StartTimer()
 
@@ -336,7 +324,6 @@ func BenchmarkFlatten(b *testing.B) {
 			if !ok {
 				break
 			}
-
 			layer = dl.flatten()
 		}
 		b.StopTimer()
@@ -351,7 +338,7 @@ func BenchmarkFlatten(b *testing.B) {
 // BenchmarkJournal-6   	       1	1471373923 ns/ops
 // BenchmarkJournal-6   	       1	1208083335 ns/op // bufio writer
 func BenchmarkJournal(b *testing.B) {
-	fill := func(parent snapshot, blocknum int) *diffLayer {
+	fill := func(parent snapshot) *diffLayer {
 		accounts := make(map[common.Hash][]byte)
 		storage := make(map[common.Hash]map[common.Hash][]byte)
 
@@ -368,15 +355,14 @@ func BenchmarkJournal(b *testing.B) {
 			}
 			storage[accountKey] = accStorage
 		}
-		return newDiffLayer(parent, uint64(blocknum), common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
-
 	var layer snapshot
 	layer = &diskLayer{
 		journal: path.Join(os.TempDir(), "difflayer_journal.tmp"),
 	}
 	for i := 1; i < 128; i++ {
-		layer = fill(layer, i)
+		layer = fill(layer)
 	}
 	b.ResetTimer()
 
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index 50321f154..f302652eb 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -32,16 +32,24 @@ type diskLayer struct {
 	db      ethdb.KeyValueStore // Key-value store containing the base snapshot
 	cache   *bigcache.BigCache  // Cache to avoid hitting the disk for direct access
 
-	number uint64      // Block number of the base snapshot
-	root   common.Hash // Root hash of the base snapshot
-	stale  bool        // Signals that the layer became stale (state progressed)
+	root  common.Hash // Root hash of the base snapshot
+	stale bool        // Signals that the layer became stale (state progressed)
 
 	lock sync.RWMutex
 }
 
-// Info returns the block number and root hash for which this snapshot was made.
-func (dl *diskLayer) Info() (uint64, common.Hash) {
-	return dl.number, dl.root
+// Root returns  root hash for which this snapshot was made.
+func (dl *diskLayer) Root() common.Hash {
+	return dl.root
+}
+
+// Stale return whether this layer has become stale (was flattened across) or if
+// it's still live.
+func (dl *diskLayer) Stale() bool {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	return dl.stale
 }
 
 // Account directly retrieves the account associated with a particular hash in
@@ -123,7 +131,7 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 // the specified data items. Note, the maps are retained by the method to avoid
 // copying everything.
 func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
-	return newDiffLayer(dl, dl.number+1, blockHash, accounts, storage)
+	return newDiffLayer(dl, blockHash, accounts, storage)
 }
 
 // Journal commits an entire diff hierarchy to disk into a single journal file.
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
index 4a66e0626..a2197557e 100644
--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -85,7 +85,7 @@ func wipeSnapshot(db ethdb.KeyValueStore) error {
 	}
 	it.Release()
 
-	rawdb.DeleteSnapshotBlock(batch)
+	rawdb.DeleteSnapshotRoot(batch)
 	if err := batch.Write(); err != nil {
 		return err
 	}
@@ -107,7 +107,7 @@ func wipeSnapshot(db ethdb.KeyValueStore) error {
 }
 
 // generateSnapshot regenerates a brand new snapshot based on an existing state database and head block.
-func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (snapshot, error) {
+func generateSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash) (snapshot, error) {
 	// Wipe any previously existing snapshot from the database
 	if err := wipeSnapshot(db); err != nil {
 		return nil, err
@@ -124,7 +124,7 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64,
 	batch := db.NewBatch()
 	triedb := trie.NewDatabase(db)
 
-	accTrie, err := trie.NewSecure(headRoot, triedb)
+	accTrie, err := trie.NewSecure(root, triedb)
 	if err != nil {
 		return nil, err
 	}
@@ -186,7 +186,7 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64,
 	fmt.Printf("Totals: %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes)
 
 	// Update the snapshot block marker and write any remainder data
-	rawdb.WriteSnapshotBlock(batch, headNumber, headRoot)
+	rawdb.WriteSnapshotRoot(batch, root)
 	batch.Write()
 	batch.Reset()
 
@@ -207,7 +207,6 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64,
 		journal: journal,
 		db:      db,
 		cache:   cache,
-		number:  headNumber,
-		root:    headRoot,
+		root:    root,
 	}, nil
 }
diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go
index 1206445c5..180db920a 100644
--- a/core/state/snapshot/generate_test.go
+++ b/core/state/snapshot/generate_test.go
@@ -47,7 +47,7 @@ func TestWipe(t *testing.T) {
 			rawdb.WriteStorageSnapshot(db, account, randomHash(), randomHash().Bytes())
 		}
 	}
-	rawdb.WriteSnapshotBlock(db, 123, randomHash())
+	rawdb.WriteSnapshotRoot(db, randomHash())
 
 	// Add some random non-snapshot data too to make wiping harder
 	for i := 0; i < 65536; i++ {
@@ -76,8 +76,8 @@ func TestWipe(t *testing.T) {
 	if items != 128+128*1024 {
 		t.Fatalf("snapshot size mismatch: have %d, want %d", items, 128+128*1024)
 	}
-	if number, hash := rawdb.ReadSnapshotBlock(db); number != 123 || hash == (common.Hash{}) {
-		t.Errorf("snapshot block marker mismatch: have #%d [%#x], want #%d [<not-nil>]", number, hash, 123)
+	if hash := rawdb.ReadSnapshotRoot(db); hash == (common.Hash{}) {
+		t.Errorf("snapshot block marker mismatch: have %#x, want <not-nil>", hash)
 	}
 	// Wipe all snapshot entries from the database
 	if err := wipeSnapshot(db); err != nil {
@@ -93,8 +93,8 @@ func TestWipe(t *testing.T) {
 			t.Errorf("snapshot entry remained after wipe: %x", key)
 		}
 	}
-	if number, hash := rawdb.ReadSnapshotBlock(db); number != 0 || hash != (common.Hash{}) {
-		t.Errorf("snapshot block marker remained after wipe: #%d [%#x]", number, hash)
+	if hash := rawdb.ReadSnapshotRoot(db); hash != (common.Hash{}) {
+		t.Errorf("snapshot block marker remained after wipe: %#x", hash)
 	}
 	// Iterate over the database and ensure miscellaneous items are present
 	items = 0
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 668522fec..edca5781d 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -43,12 +43,16 @@ var (
 	// layer had been invalidated due to the chain progressing forward far enough
 	// to not maintain the layer's original state.
 	ErrSnapshotStale = errors.New("snapshot stale")
+
+	// errSnapshotCycle is returned if a snapshot is attempted to be inserted
+	// that forms a cycle in the snapshot tree.
+	errSnapshotCycle = errors.New("snapshot cycle")
 )
 
 // Snapshot represents the functionality supported by a snapshot storage layer.
 type Snapshot interface {
-	// Info returns the block number and root hash for which this snapshot was made.
-	Info() (uint64, common.Hash)
+	// Root returns the root hash for which this snapshot was made.
+	Root() common.Hash
 
 	// Account directly retrieves the account associated with a particular hash in
 	// the snapshot slim data format.
@@ -77,6 +81,10 @@ type snapshot interface {
 	// This is meant to be used during shutdown to persist the snapshot without
 	// flattening everything down (bad for reorgs).
 	Journal() error
+
+	// Stale return whether this layer has become stale (was flattened across) or
+	// if it's still live.
+	Stale() bool
 }
 
 // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
@@ -88,7 +96,7 @@ type snapshot interface {
 // The goal of a state snapshot is twofold: to allow direct access to account and
 // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
 // cheap iteration of the account/storage tries for sync aid.
-type SnapshotTree struct {
+type Tree struct {
 	layers map[common.Hash]snapshot // Collection of all known layers // TODO(karalabe): split Clique overlaps
 	lock   sync.RWMutex
 }
@@ -99,22 +107,21 @@ type SnapshotTree struct {
 //
 // If the snapshot is missing or inconsistent, the entirety is deleted and will
 // be reconstructed from scratch based on the tries in the key-value store.
-func New(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (*SnapshotTree, error) {
+func New(db ethdb.KeyValueStore, journal string, root common.Hash) (*Tree, error) {
 	// Attempt to load a previously persisted snapshot
-	head, err := loadSnapshot(db, journal, headNumber, headRoot)
+	head, err := loadSnapshot(db, journal, root)
 	if err != nil {
 		log.Warn("Failed to load snapshot, regenerating", "err", err)
-		if head, err = generateSnapshot(db, journal, headNumber, headRoot); err != nil {
+		if head, err = generateSnapshot(db, journal, root); err != nil {
 			return nil, err
 		}
 	}
 	// Existing snapshot loaded or one regenerated, seed all the layers
-	snap := &SnapshotTree{
+	snap := &Tree{
 		layers: make(map[common.Hash]snapshot),
 	}
 	for head != nil {
-		_, root := head.Info()
-		snap.layers[root] = head
+		snap.layers[head.Root()] = head
 
 		switch self := head.(type) {
 		case *diffLayer:
@@ -130,54 +137,57 @@ func New(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot com
 
 // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
 // snapshot is maintained for that block.
-func (st *SnapshotTree) Snapshot(blockRoot common.Hash) Snapshot {
-	st.lock.RLock()
-	defer st.lock.RUnlock()
+func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
+	t.lock.RLock()
+	defer t.lock.RUnlock()
 
-	return st.layers[blockRoot]
+	return t.layers[blockRoot]
 }
 
 // Update adds a new snapshot into the tree, if that can be linked to an existing
 // old parent. It is disallowed to insert a disk layer (the origin of all).
-func (st *SnapshotTree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
+func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
+	// Reject noop updates to avoid self-loops in the snapshot tree. This is a
+	// special case that can only happen for Clique networks where empty blocks
+	// don't modify the state (0 block subsidy).
+	//
+	// Although we could silently ignore this internally, it should be the caller's
+	// responsibility to avoid even attempting to insert such a snapshot.
+	if blockRoot == parentRoot {
+		return errSnapshotCycle
+	}
 	// Generate a new snapshot on top of the parent
-	parent := st.Snapshot(parentRoot).(snapshot)
+	parent := t.Snapshot(parentRoot).(snapshot)
 	if parent == nil {
 		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
 	}
 	snap := parent.Update(blockRoot, accounts, storage)
 
 	// Save the new snapshot for later
-	st.lock.Lock()
-	defer st.lock.Unlock()
+	t.lock.Lock()
+	defer t.lock.Unlock()
 
-	st.layers[snap.root] = snap
+	t.layers[snap.root] = snap
 	return nil
 }
 
 // Cap traverses downwards the snapshot tree from a head block hash until the
 // number of allowed layers are crossed. All layers beyond the permitted number
 // are flattened downwards.
-func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) error {
+func (t *Tree) Cap(root common.Hash, layers int, memory uint64) error {
 	// Retrieve the head snapshot to cap from
-	var snap snapshot
-	if s := st.Snapshot(blockRoot); s == nil {
-		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
-	} else {
-		snap = s.(snapshot)
+	snap := t.Snapshot(root)
+	if snap == nil {
+		return fmt.Errorf("snapshot [%#x] missing", root)
 	}
 	diff, ok := snap.(*diffLayer)
 	if !ok {
-		return fmt.Errorf("snapshot [%#x] is disk layer", blockRoot)
+		return fmt.Errorf("snapshot [%#x] is disk layer", root)
 	}
 	// Run the internal capping and discard all stale layers
-	st.lock.Lock()
-	defer st.lock.Unlock()
+	t.lock.Lock()
+	defer t.lock.Unlock()
 
-	var (
-		diskNumber uint64
-		diffNumber uint64
-	)
 	// Flattening the bottom-most diff layer requires special casing since there's
 	// no child to rewire to the grandparent. In that case we can fake a temporary
 	// child for the capping and then remove it.
@@ -188,8 +198,9 @@ func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) er
 		base := diffToDisk(diff.flatten().(*diffLayer))
 		diff.lock.RUnlock()
 
-		st.layers[base.root] = base
-		diskNumber, diffNumber = base.number, base.number
+		// Replace the entire snapshot tree with the flat base
+		t.layers = map[common.Hash]snapshot{base.root: base}
+		return nil
 
 	case 1:
 		// If full flattening was requested, flatten the diffs but only merge if the
@@ -205,59 +216,74 @@ func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) er
 		}
 		diff.lock.RUnlock()
 
+		// If all diff layers were removed, replace the entire snapshot tree
 		if base != nil {
-			st.layers[base.root] = base
-			diskNumber, diffNumber = base.number, base.number
-		} else {
-			st.layers[bottom.root] = bottom
-			diskNumber, diffNumber = bottom.parent.(*diskLayer).number, bottom.number
+			t.layers = map[common.Hash]snapshot{base.root: base}
+			return nil
 		}
+		// Merge the new aggregated layer into the snapshot tree, clean stales below
+		t.layers[bottom.root] = bottom
 
 	default:
-		diskNumber, diffNumber = st.cap(diff, layers, memory)
+		// Many layers requested to be retained, cap normally
+		t.cap(diff, layers, memory)
 	}
-	for root, snap := range st.layers {
-		if number, _ := snap.Info(); number != diskNumber && number < diffNumber {
-			delete(st.layers, root)
+	// Remove any layer that is stale or links into a stale layer
+	children := make(map[common.Hash][]common.Hash)
+	for root, snap := range t.layers {
+		if diff, ok := snap.(*diffLayer); ok {
+			parent := diff.parent.Root()
+			children[parent] = append(children[parent], root)
+		}
+	}
+	var remove func(root common.Hash)
+	remove = func(root common.Hash) {
+		delete(t.layers, root)
+		for _, child := range children[root] {
+			remove(child)
+		}
+		delete(children, root)
+	}
+	for root, snap := range t.layers {
+		if snap.Stale() {
+			remove(root)
 		}
 	}
 	return nil
 }
 
 // cap traverses downwards the diff tree until the number of allowed layers are
-// crossed. All diffs beyond the permitted number are flattened downwards. If
-// the layer limit is reached, memory cap is also enforced (but not before). The
-// block numbers for the disk layer and first diff layer are returned for GC.
-func (st *SnapshotTree) cap(diff *diffLayer, layers int, memory uint64) (uint64, uint64) {
+// crossed. All diffs beyond the permitted number are flattened downwards. If the
+// layer limit is reached, memory cap is also enforced (but not before).
+func (t *Tree) cap(diff *diffLayer, layers int, memory uint64) {
 	// Dive until we run out of layers or reach the persistent database
 	for ; layers > 2; layers-- {
 		// If we still have diff layers below, continue down
 		if parent, ok := diff.parent.(*diffLayer); ok {
 			diff = parent
 		} else {
-			// Diff stack too shallow, return block numbers without modifications
-			return diff.parent.(*diskLayer).number, diff.number
+			// Diff stack too shallow, return without modifications
+			return
 		}
 	}
 	// We're out of layers, flatten anything below, stopping if it's the disk or if
 	// the memory limit is not yet exceeded.
 	switch parent := diff.parent.(type) {
 	case *diskLayer:
-		return parent.number, diff.number
+		return
 
 	case *diffLayer:
 		// Flatten the parent into the grandparent. The flattening internally obtains a
 		// write lock on grandparent.
 		flattened := parent.flatten().(*diffLayer)
-		st.layers[flattened.root] = flattened
+		t.layers[flattened.root] = flattened
 
 		diff.lock.Lock()
 		defer diff.lock.Unlock()
 
 		diff.parent = flattened
 		if flattened.memory < memory {
-			diskNumber, _ := flattened.parent.Info()
-			return diskNumber, flattened.number
+			return
 		}
 	default:
 		panic(fmt.Sprintf("unknown data layer: %T", parent))
@@ -269,10 +295,8 @@ func (st *SnapshotTree) cap(diff *diffLayer, layers int, memory uint64) (uint64,
 	base := diffToDisk(bottom)
 	bottom.lock.RUnlock()
 
-	st.layers[base.root] = base
+	t.layers[base.root] = base
 	diff.parent = base
-
-	return base.number, diff.number
 }
 
 // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
@@ -284,7 +308,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 	)
 	// Start by temporarily deleting the current snapshot block marker. This
 	// ensures that in the case of a crash, the entire snapshot is invalidated.
-	rawdb.DeleteSnapshotBlock(batch)
+	rawdb.DeleteSnapshotRoot(batch)
 
 	// Mark the original base as stale as we're going to create a new wrapper
 	base.lock.Lock()
@@ -341,13 +365,12 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 		}
 	}
 	// Update the snapshot block marker and write any remainder data
-	rawdb.WriteSnapshotBlock(batch, bottom.number, bottom.root)
+	rawdb.WriteSnapshotRoot(batch, bottom.root)
 	if err := batch.Write(); err != nil {
 		log.Crit("Failed to write leftover snapshot", "err", err)
 	}
 	return &diskLayer{
 		root:    bottom.root,
-		number:  bottom.number,
 		cache:   base.cache,
 		db:      base.db,
 		journal: base.journal,
@@ -357,27 +380,25 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 // Journal commits an entire diff hierarchy to disk into a single journal file.
 // This is meant to be used during shutdown to persist the snapshot without
 // flattening everything down (bad for reorgs).
-func (st *SnapshotTree) Journal(blockRoot common.Hash) error {
-	// Retrieve the head snapshot to journal from
-	var snap snapshot
-	if s := st.Snapshot(blockRoot); s == nil {
+func (t *Tree) Journal(blockRoot common.Hash) error {
+	// Retrieve the head snapshot to journal from var snap snapshot
+	snap := t.Snapshot(blockRoot)
+	if snap == nil {
 		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
-	} else {
-		snap = s.(snapshot)
 	}
 	// Run the journaling
-	st.lock.Lock()
-	defer st.lock.Unlock()
+	t.lock.Lock()
+	defer t.lock.Unlock()
 
-	return snap.Journal()
+	return snap.(snapshot).Journal()
 }
 
 // loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
-func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (snapshot, error) {
+func loadSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash) (snapshot, error) {
 	// Retrieve the block number and hash of the snapshot, failing if no snapshot
 	// is present in the database (or crashed mid-update).
-	number, root := rawdb.ReadSnapshotBlock(db)
-	if root == (common.Hash{}) {
+	baseRoot := rawdb.ReadSnapshotRoot(db)
+	if baseRoot == (common.Hash{}) {
 		return nil, errors.New("missing or corrupted snapshot")
 	}
 	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
@@ -391,16 +412,14 @@ func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, hea
 		journal: journal,
 		db:      db,
 		cache:   cache,
-		number:  number,
-		root:    root,
+		root:    baseRoot,
 	}
 	// Load all the snapshot diffs from the journal, failing if their chain is broken
 	// or does not lead from the disk snapshot to the specified head.
 	if _, err := os.Stat(journal); os.IsNotExist(err) {
 		// Journal doesn't exist, don't worry if it's not supposed to
-		if number != headNumber || root != headRoot {
-			return nil, fmt.Errorf("snapshot journal missing, head doesn't match snapshot: #%d [%#x] vs. #%d [%#x]",
-				headNumber, headRoot, number, root)
+		if baseRoot != root {
+			return nil, fmt.Errorf("snapshot journal missing, head doesn't match snapshot: have %#x, want %#x", baseRoot, root)
 		}
 		return base, nil
 	}
@@ -414,10 +433,8 @@ func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, hea
 	}
 	// Entire snapshot journal loaded, sanity check the head and return
 	// Journal doesn't exist, don't worry if it's not supposed to
-	number, root = snapshot.Info()
-	if number != headNumber || root != headRoot {
-		return nil, fmt.Errorf("head doesn't match snapshot: #%d [%#x] vs. #%d [%#x]",
-			headNumber, headRoot, number, root)
+	if head := snapshot.Root(); head != root {
+		return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
 	}
 	return snapshot, nil
 }
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index 1551a71a2..40edf79e8 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -37,7 +37,7 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 		root:  common.HexToHash("0x01"),
 		cache: cache,
 	}
-	snaps := &SnapshotTree{
+	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
 			base.root: base,
 		},
@@ -83,7 +83,7 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 		root:  common.HexToHash("0x01"),
 		cache: cache,
 	}
-	snaps := &SnapshotTree{
+	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
 			base.root: base,
 		},
@@ -132,7 +132,7 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 		root:  common.HexToHash("0x01"),
 		cache: cache,
 	}
-	snaps := &SnapshotTree{
+	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
 			base.root: base,
 		},
@@ -181,7 +181,7 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 		root:  common.HexToHash("0x01"),
 		cache: cache,
 	}
-	snaps := &SnapshotTree{
+	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
 			base.root: base,
 		},
@@ -213,7 +213,6 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	if got := len(snaps.layers); got != exp {
 		t.Errorf("layers modified, got %d exp %d", got, exp)
 	}
-
 	// Flatten the diff layer into the bottom accumulator
 	if err := snaps.Cap(common.HexToHash("0x04"), 2, 1024*1024); err != nil {
 		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
@@ -247,7 +246,7 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 		root:  common.HexToHash("0x01"),
 		cache: cache,
 	}
-	snaps := &SnapshotTree{
+	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
 			base.root: base,
 		},
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 7d7499892..f11bd2adb 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -68,7 +68,7 @@ type StateDB struct {
 	db   Database
 	trie Trie
 
-	snaps        *snapshot.SnapshotTree
+	snaps        *snapshot.Tree
 	snap         snapshot.Snapshot
 	snapAccounts map[common.Hash][]byte
 	snapStorage  map[common.Hash]map[common.Hash][]byte
@@ -117,7 +117,7 @@ type StateDB struct {
 }
 
 // Create a new state from a given trie.
-func New(root common.Hash, db Database, snaps *snapshot.SnapshotTree) (*StateDB, error) {
+func New(root common.Hash, db Database, snaps *snapshot.Tree) (*StateDB, error) {
 	tr, err := db.OpenTrie(root)
 	if err != nil {
 		return nil, err
@@ -840,12 +840,14 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 		if metrics.EnabledExpensive {
 			defer func(start time.Time) { s.SnapshotCommits += time.Since(start) }(time.Now())
 		}
-		_, parentRoot := s.snap.Info()
-		if err := s.snaps.Update(root, parentRoot, s.snapAccounts, s.snapStorage); err != nil {
-			log.Warn("Failed to update snapshot tree", "from", parentRoot, "to", root, "err", err)
-		}
-		if err := s.snaps.Cap(root, 16, 4*1024*1024); err != nil {
-			log.Warn("Failed to cap snapshot tree", "root", root, "layers", 16, "memory", 4*1024*1024, "err", err)
+		// Only update if there's a state transition (skip empty Clique blocks)
+		if parent := s.snap.Root(); parent != root {
+			if err := s.snaps.Update(root, parent, s.snapAccounts, s.snapStorage); err != nil {
+				log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err)
+			}
+			if err := s.snaps.Cap(root, 16, 4*1024*1024); err != nil {
+				log.Warn("Failed to cap snapshot tree", "root", root, "layers", 16, "memory", 4*1024*1024, "err", err)
+			}
 		}
 		s.snap, s.snapAccounts, s.snapStorage = nil, nil, nil
 	}

From f300c0df01dde6b23f5fec4f1b2f91bc75f32c2f Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Mon, 25 Nov 2019 15:30:29 +0100
Subject: [PATCH 06/28] core/state/snapshot: replace bigcache with fastcache

---
 core/state/snapshot/disklayer.go     | 14 ++++++--------
 core/state/snapshot/generate.go      | 10 ++--------
 core/state/snapshot/snapshot.go      | 22 +++++++---------------
 core/state/snapshot/snapshot_test.go | 18 ++++++------------
 4 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index f302652eb..474182f1d 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -19,7 +19,7 @@ package snapshot
 import (
 	"sync"
 
-	"github.com/allegro/bigcache"
+	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/ethdb"
@@ -30,7 +30,7 @@ import (
 type diskLayer struct {
 	journal string              // Path of the snapshot journal to use on shutdown
 	db      ethdb.KeyValueStore // Key-value store containing the base snapshot
-	cache   *bigcache.BigCache  // Cache to avoid hitting the disk for direct access
+	cache   *fastcache.Cache    // Cache to avoid hitting the disk for direct access
 
 	root  common.Hash // Root hash of the base snapshot
 	stale bool        // Signals that the layer became stale (state progressed)
@@ -80,17 +80,15 @@ func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	if dl.stale {
 		return nil, ErrSnapshotStale
 	}
-	key := string(hash[:])
-
 	// Try to retrieve the account from the memory cache
-	if blob, err := dl.cache.Get(key); err == nil {
+	if blob := dl.cache.Get(nil, hash[:]); blob != nil {
 		snapshotCleanHitMeter.Mark(1)
 		snapshotCleanReadMeter.Mark(int64(len(blob)))
 		return blob, nil
 	}
 	// Cache doesn't contain account, pull from disk and cache for later
 	blob := rawdb.ReadAccountSnapshot(dl.db, hash)
-	dl.cache.Set(key, blob)
+	dl.cache.Set(hash[:], blob)
 
 	snapshotCleanMissMeter.Mark(1)
 	snapshotCleanWriteMeter.Mark(int64(len(blob)))
@@ -109,10 +107,10 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 	if dl.stale {
 		return nil, ErrSnapshotStale
 	}
-	key := string(append(accountHash[:], storageHash[:]...))
+	key := append(accountHash[:], storageHash[:]...)
 
 	// Try to retrieve the storage slot from the memory cache
-	if blob, err := dl.cache.Get(key); err == nil {
+	if blob := dl.cache.Get(nil, key); blob != nil {
 		snapshotCleanHitMeter.Mark(1)
 		snapshotCleanReadMeter.Mark(int64(len(blob)))
 		return blob, nil
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
index a2197557e..445a6ebd9 100644
--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -22,7 +22,7 @@ import (
 	"math/big"
 	"time"
 
-	"github.com/allegro/bigcache"
+	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/crypto"
@@ -196,13 +196,7 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash)
 		return nil, err
 	}
 	// New snapshot generated, construct a brand new base layer
-	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
-		Shards:             1024,
-		LifeWindow:         time.Hour,
-		MaxEntriesInWindow: 512 * 1024,
-		MaxEntrySize:       512,
-		HardMaxCacheSize:   512,
-	})
+	cache := fastcache.New(512 * 1024 * 1024)
 	return &diskLayer{
 		journal: journal,
 		db:      db,
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index edca5781d..d35d69839 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -22,9 +22,8 @@ import (
 	"fmt"
 	"os"
 	"sync"
-	"time"
 
-	"github.com/allegro/bigcache"
+	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/ethdb"
@@ -323,7 +322,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 		if len(data) > 0 {
 			// Account was updated, push to disk
 			rawdb.WriteAccountSnapshot(batch, hash, data)
-			base.cache.Set(string(hash[:]), data)
+			base.cache.Set(hash[:], data)
 
 			if batch.ValueSize() > ethdb.IdealBatchSize {
 				if err := batch.Write(); err != nil {
@@ -334,13 +333,13 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 		} else {
 			// Account was deleted, remove all storage slots too
 			rawdb.DeleteAccountSnapshot(batch, hash)
-			base.cache.Set(string(hash[:]), nil)
+			base.cache.Set(hash[:], nil)
 
 			it := rawdb.IterateStorageSnapshots(base.db, hash)
 			for it.Next() {
 				if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
 					batch.Delete(key)
-					base.cache.Delete(string(key[1:]))
+					base.cache.Del(key[1:])
 				}
 			}
 			it.Release()
@@ -351,10 +350,10 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 		for storageHash, data := range storage {
 			if len(data) > 0 {
 				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
-				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), data)
+				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
 			} else {
 				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
-				base.cache.Set(string(append(accountHash[:], storageHash[:]...)), nil)
+				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
 			}
 		}
 		if batch.ValueSize() > ethdb.IdealBatchSize {
@@ -401,17 +400,10 @@ func loadSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash) (sna
 	if baseRoot == (common.Hash{}) {
 		return nil, errors.New("missing or corrupted snapshot")
 	}
-	cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup
-		Shards:             1024,
-		LifeWindow:         time.Hour,
-		MaxEntriesInWindow: 512 * 1024,
-		MaxEntrySize:       512,
-		HardMaxCacheSize:   512,
-	})
 	base := &diskLayer{
 		journal: journal,
 		db:      db,
-		cache:   cache,
+		cache:   fastcache.New(512 * 1024 * 1024),
 		root:    baseRoot,
 	}
 	// Load all the snapshot diffs from the journal, failing if their chain is broken
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index 40edf79e8..9c872a895 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -19,9 +19,8 @@ package snapshot
 import (
 	"fmt"
 	"testing"
-	"time"
 
-	"github.com/allegro/bigcache"
+	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 )
@@ -31,11 +30,10 @@ import (
 // to check internal corner case around the bottom-most memory accumulator.
 func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
-	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
 	base := &diskLayer{
 		db:    rawdb.NewMemoryDatabase(),
 		root:  common.HexToHash("0x01"),
-		cache: cache,
+		cache: fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -77,11 +75,10 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 // layer to check the usual mode of operation where the accumulator is retained.
 func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
-	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
 	base := &diskLayer{
 		db:    rawdb.NewMemoryDatabase(),
 		root:  common.HexToHash("0x01"),
-		cache: cache,
+		cache: fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -126,11 +123,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 // to check internal corner case around the bottom-most memory accumulator.
 func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
-	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
 	base := &diskLayer{
 		db:    rawdb.NewMemoryDatabase(),
 		root:  common.HexToHash("0x01"),
-		cache: cache,
+		cache: fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -175,11 +171,10 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 // layer to check the usual mode of operation where the accumulator is retained.
 func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
-	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
 	base := &diskLayer{
 		db:    rawdb.NewMemoryDatabase(),
 		root:  common.HexToHash("0x01"),
-		cache: cache,
+		cache: fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -240,11 +235,10 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 		}
 	}
 	// Create a starting base layer and a snapshot tree out of it
-	cache, _ := bigcache.NewBigCache(bigcache.DefaultConfig(time.Minute))
 	base := &diskLayer{
 		db:    rawdb.NewMemoryDatabase(),
 		root:  common.HexToHash("0x01"),
-		cache: cache,
+		cache: fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{

From 351a5903b0ccb9c77b5f0983fdd17c3d4de7acf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 26 Nov 2019 09:48:29 +0200
Subject: [PATCH 07/28] core/rawdb, core/state/snapshot: runtime snapshot
 generation

---
 cmd/geth/main.go                              |   1 +
 cmd/geth/usage.go                             |   1 +
 cmd/utils/flags.go                            |  13 +-
 core/blockchain.go                            |  24 +-
 core/rawdb/database.go                        |   4 +-
 core/rawdb/schema.go                          |  19 +-
 core/state/snapshot/difflayer.go              | 206 ++++++++-
 core/state/snapshot/difflayer_journal.go      | 137 ------
 core/state/snapshot/difflayer_test.go         |  54 +--
 core/state/snapshot/disklayer.go              |  57 ++-
 core/state/snapshot/disklayer_test.go         | 433 ++++++++++++++++++
 core/state/snapshot/generate.go               | 330 +++++++------
 core/state/snapshot/journal.go                | 257 +++++++++++
 core/state/snapshot/snapshot.go               | 270 ++++++++---
 core/state/snapshot/snapshot_test.go          |  49 +-
 core/state/snapshot/wipe.go                   | 130 ++++++
 .../{generate_test.go => wipe_test.go}        |  38 +-
 core/state/statedb.go                         |   4 +-
 eth/backend.go                                |   6 +-
 eth/config.go                                 |   2 +
 trie/iterator.go                              |   2 -
 21 files changed, 1551 insertions(+), 486 deletions(-)
 delete mode 100644 core/state/snapshot/difflayer_journal.go
 create mode 100644 core/state/snapshot/disklayer_test.go
 create mode 100644 core/state/snapshot/journal.go
 create mode 100644 core/state/snapshot/wipe.go
 rename core/state/snapshot/{generate_test.go => wipe_test.go} (77%)

diff --git a/cmd/geth/main.go b/cmd/geth/main.go
index 99ef78238..36187e484 100644
--- a/cmd/geth/main.go
+++ b/cmd/geth/main.go
@@ -106,6 +106,7 @@ var (
 		utils.CacheDatabaseFlag,
 		utils.CacheTrieFlag,
 		utils.CacheGCFlag,
+		utils.CacheSnapshotFlag,
 		utils.CacheNoPrefetchFlag,
 		utils.ListenPortFlag,
 		utils.MaxPeersFlag,
diff --git a/cmd/geth/usage.go b/cmd/geth/usage.go
index 6f3197b9c..f2f3b5756 100644
--- a/cmd/geth/usage.go
+++ b/cmd/geth/usage.go
@@ -137,6 +137,7 @@ var AppHelpFlagGroups = []flagGroup{
 			utils.CacheDatabaseFlag,
 			utils.CacheTrieFlag,
 			utils.CacheGCFlag,
+			utils.CacheSnapshotFlag,
 			utils.CacheNoPrefetchFlag,
 		},
 	},
diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go
index bdadebd85..22fe677fa 100644
--- a/cmd/utils/flags.go
+++ b/cmd/utils/flags.go
@@ -383,14 +383,19 @@ var (
 	}
 	CacheTrieFlag = cli.IntFlag{
 		Name:  "cache.trie",
-		Usage: "Percentage of cache memory allowance to use for trie caching (default = 25% full mode, 50% archive mode)",
-		Value: 25,
+		Usage: "Percentage of cache memory allowance to use for trie caching (default = 15% full mode, 30% archive mode)",
+		Value: 15,
 	}
 	CacheGCFlag = cli.IntFlag{
 		Name:  "cache.gc",
 		Usage: "Percentage of cache memory allowance to use for trie pruning (default = 25% full mode, 0% archive mode)",
 		Value: 25,
 	}
+	CacheSnapshotFlag = cli.IntFlag{
+		Name:  "cache.snapshot",
+		Usage: "Percentage of cache memory allowance to use for snapshot caching (default = 10% full mode, 20% archive mode)",
+		Value: 10,
+	}
 	CacheNoPrefetchFlag = cli.BoolFlag{
 		Name:  "cache.noprefetch",
 		Usage: "Disable heuristic state prefetch during block import (less CPU and disk IO, more time waiting for data)",
@@ -1463,6 +1468,9 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *eth.Config) {
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheGCFlag.Name) {
 		cfg.TrieDirtyCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheGCFlag.Name) / 100
 	}
+	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheSnapshotFlag.Name) {
+		cfg.SnapshotCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheSnapshotFlag.Name) / 100
+	}
 	if ctx.GlobalIsSet(DocRootFlag.Name) {
 		cfg.DocRoot = ctx.GlobalString(DocRootFlag.Name)
 	}
@@ -1724,6 +1732,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node) (chain *core.BlockChain, chai
 		TrieDirtyLimit:      eth.DefaultConfig.TrieDirtyCache,
 		TrieDirtyDisabled:   ctx.GlobalString(GCModeFlag.Name) == "archive",
 		TrieTimeLimit:       eth.DefaultConfig.TrieTimeout,
+		SnapshotLimit:       eth.DefaultConfig.SnapshotCache,
 	}
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheTrieFlag.Name) {
 		cache.TrieCleanLimit = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheTrieFlag.Name) / 100
diff --git a/core/blockchain.go b/core/blockchain.go
index 6fb722d2d..3932baf55 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -62,8 +62,8 @@ var (
 	storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil)
 	storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil)
 
-	snapshotAccountReadTimer = metrics.NewRegisteredTimer("chain/snapshot/accountreads", nil)
-	snapshotStorageReadTimer = metrics.NewRegisteredTimer("chain/snapshot/storagereads", nil)
+	snapshotAccountReadTimer = metrics.NewRegisteredTimer("chain/snapshot/account/reads", nil)
+	snapshotStorageReadTimer = metrics.NewRegisteredTimer("chain/snapshot/storage/reads", nil)
 	snapshotCommitTimer      = metrics.NewRegisteredTimer("chain/snapshot/commits", nil)
 
 	blockInsertTimer     = metrics.NewRegisteredTimer("chain/inserts", nil)
@@ -120,6 +120,7 @@ type CacheConfig struct {
 	TrieDirtyLimit      int           // Memory limit (MB) at which to start flushing dirty trie nodes to disk
 	TrieDirtyDisabled   bool          // Whether to disable trie write caching and GC altogether (archive node)
 	TrieTimeLimit       time.Duration // Time limit after which to flush the current in-memory trie to disk
+	SnapshotLimit       int           // Memory allowance (MB) to use for caching snapshot entries in memory
 }
 
 // BlockChain represents the canonical chain given a database with a genesis
@@ -194,6 +195,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 			TrieCleanLimit: 256,
 			TrieDirtyLimit: 256,
 			TrieTimeLimit:  5 * time.Minute,
+			SnapshotLimit:  256,
 		}
 	}
 	bodyCache, _ := lru.New(bodyCacheLimit)
@@ -300,10 +302,8 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 		}
 	}
 	// Load any existing snapshot, regenerating it if loading failed
-	head := bc.CurrentBlock()
-	if bc.snaps, err = snapshot.New(bc.db, "snapshot.rlp", head.Root()); err != nil {
-		return nil, err
-	}
+	bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), "snapshot.rlp", bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
+
 	// Take ownership of this particular state
 	go bc.update()
 	return bc, nil
@@ -497,6 +497,9 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error {
 	headBlockGauge.Update(int64(block.NumberU64()))
 	bc.chainmu.Unlock()
 
+	// Destroy any existing state snapshot and regenerate it in the background
+	bc.snaps.Rebuild(block.Root())
+
 	log.Info("Committed new head block", "number", block.Number(), "hash", hash)
 	return nil
 }
@@ -851,7 +854,8 @@ func (bc *BlockChain) Stop() {
 	bc.wg.Wait()
 
 	// Ensure that the entirety of the state snapshot is journalled to disk.
-	if err := bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
+	snapBase, err := bc.snaps.Journal(bc.CurrentBlock().Root(), "snapshot.rlp")
+	if err != nil {
 		log.Error("Failed to journal state snapshot", "err", err)
 	}
 	// Ensure the state of a recent block is also stored to disk before exiting.
@@ -872,6 +876,12 @@ func (bc *BlockChain) Stop() {
 				}
 			}
 		}
+		if snapBase != (common.Hash{}) {
+			log.Info("Writing snapshot state to disk", "root", snapBase)
+			if err := triedb.Commit(snapBase, true); err != nil {
+				log.Error("Failed to commit recent state trie", "err", err)
+			}
+		}
 		for !bc.triegc.Empty() {
 			triedb.Dereference(bc.triegc.PopItem().(common.Hash))
 		}
diff --git a/core/rawdb/database.go b/core/rawdb/database.go
index 7abd07359..b74d8e2e3 100644
--- a/core/rawdb/database.go
+++ b/core/rawdb/database.go
@@ -282,9 +282,9 @@ func InspectDatabase(db ethdb.Database) error {
 			receiptSize += size
 		case bytes.HasPrefix(key, txLookupPrefix) && len(key) == (len(txLookupPrefix)+common.HashLength):
 			txlookupSize += size
-		case bytes.HasPrefix(key, StateSnapshotPrefix) && len(key) == (len(StateSnapshotPrefix)+common.HashLength):
+		case bytes.HasPrefix(key, SnapshotAccountPrefix) && len(key) == (len(SnapshotAccountPrefix)+common.HashLength):
 			accountSnapSize += size
-		case bytes.HasPrefix(key, StateSnapshotPrefix) && len(key) == (len(StateSnapshotPrefix)+2*common.HashLength):
+		case bytes.HasPrefix(key, SnapshotStoragePrefix) && len(key) == (len(SnapshotStoragePrefix)+2*common.HashLength):
 			storageSnapSize += size
 		case bytes.HasPrefix(key, preimagePrefix) && len(key) == (len(preimagePrefix)+common.HashLength):
 			preimageSize += size
diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go
index d20658792..1b8e53eb6 100644
--- a/core/rawdb/schema.go
+++ b/core/rawdb/schema.go
@@ -53,9 +53,10 @@ var (
 	blockBodyPrefix     = []byte("b") // blockBodyPrefix + num (uint64 big endian) + hash -> block body
 	blockReceiptsPrefix = []byte("r") // blockReceiptsPrefix + num (uint64 big endian) + hash -> block receipts
 
-	txLookupPrefix      = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata
-	bloomBitsPrefix     = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits
-	StateSnapshotPrefix = []byte("s") // StateSnapshotPrefix + account hash [+ storage hash] -> account/storage trie value
+	txLookupPrefix        = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata
+	bloomBitsPrefix       = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits
+	SnapshotAccountPrefix = []byte("a") // SnapshotAccountPrefix + account hash -> account trie value
+	SnapshotStoragePrefix = []byte("s") // SnapshotStoragePrefix + account hash + storage hash -> storage trie value
 
 	preimagePrefix = []byte("secure-key-")      // preimagePrefix + hash -> preimage
 	configPrefix   = []byte("ethereum-config-") // config prefix for the db
@@ -149,19 +150,19 @@ func txLookupKey(hash common.Hash) []byte {
 	return append(txLookupPrefix, hash.Bytes()...)
 }
 
-// accountSnapshotKey = StateSnapshotPrefix + hash
+// accountSnapshotKey = SnapshotAccountPrefix + hash
 func accountSnapshotKey(hash common.Hash) []byte {
-	return append(StateSnapshotPrefix, hash.Bytes()...)
+	return append(SnapshotAccountPrefix, hash.Bytes()...)
 }
 
-// storageSnapshotKey = StateSnapshotPrefix + account hash + storage hash
+// storageSnapshotKey = SnapshotStoragePrefix + account hash + storage hash
 func storageSnapshotKey(accountHash, storageHash common.Hash) []byte {
-	return append(append(StateSnapshotPrefix, accountHash.Bytes()...), storageHash.Bytes()...)
+	return append(append(SnapshotStoragePrefix, accountHash.Bytes()...), storageHash.Bytes()...)
 }
 
-// storageSnapshotsKey = StateSnapshotPrefix + account hash + storage hash
+// storageSnapshotsKey = SnapshotStoragePrefix + account hash + storage hash
 func storageSnapshotsKey(accountHash common.Hash) []byte {
-	return append(StateSnapshotPrefix, accountHash.Bytes()...)
+	return append(SnapshotStoragePrefix, accountHash.Bytes()...)
 }
 
 // bloomBitsKey = bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 7e8487ea8..0743e4759 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -17,13 +17,52 @@
 package snapshot
 
 import (
+	"encoding/binary"
 	"fmt"
+	"math"
 	"sort"
 	"sync"
+	"time"
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/steakknife/bloomfilter"
+)
+
+var (
+	// aggregatorMemoryLimit is the maximum size of the bottom-most diff layer
+	// that aggregates the writes from above until it's flushed into the disk
+	// layer.
+	//
+	// Note, bumping this up might drastically increase the size of the bloom
+	// filters that's stored in every diff layer. Don't do that without fully
+	// understanding all the implications.
+	aggregatorMemoryLimit = uint64(4 * 1024 * 1024)
+
+	// aggregatorItemLimit is an approximate number of items that will end up
+	// in the agregator layer before it's flushed out to disk. A plain account
+	// weighs around 14B (+hash), a storage slot 32B (+hash), so 50 is a very
+	// rough average of what we might see.
+	aggregatorItemLimit = aggregatorMemoryLimit / 55
+
+	// bloomTargetError is the target false positive rate when the aggregator
+	// layer is at its fullest. The actual value will probably move around up
+	// and down from this number, it's mostly a ballpark figure.
+	//
+	// Note, dropping this down might drastically increase the size of the bloom
+	// filters that's stored in every diff layer. Don't do that without fully
+	// understanding all the implications.
+	bloomTargetError = 0.02
+
+	// bloomSize is the ideal bloom filter size given the maximum number of items
+	// it's expected to hold and the target false positive error rate.
+	bloomSize = math.Ceil(float64(aggregatorItemLimit) * math.Log(bloomTargetError) / math.Log(1/math.Pow(2, math.Log(2))))
+
+	// bloomFuncs is the ideal number of bits a single entry should set in the
+	// bloom filter to keep its size to a minimum (given it's size and maximum
+	// entry count).
+	bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2))
 )
 
 // diffLayer represents a collection of modifications made to a state snapshot
@@ -33,8 +72,9 @@ import (
 // The goal of a diff layer is to act as a journal, tracking recent modifications
 // made to the state, that have not yet graduated into a semi-immutable state.
 type diffLayer struct {
-	parent snapshot // Parent snapshot modified by this one, never nil
-	memory uint64   // Approximate guess as to how much memory we use
+	origin *diskLayer // Base disk layer to directly use on bloom misses
+	parent snapshot   // Parent snapshot modified by this one, never nil
+	memory uint64     // Approximate guess as to how much memory we use
 
 	root  common.Hash // Root hash to which this snapshot diff belongs to
 	stale bool        // Signals that the layer became stale (state progressed)
@@ -44,9 +84,39 @@ type diffLayer struct {
 	storageList map[common.Hash][]common.Hash          // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil
 	storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted)
 
+	diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer
+
 	lock sync.RWMutex
 }
 
+// accountBloomHasher is a wrapper around a common.Hash to satisfy the interface
+// API requirements of the bloom library used. It's used to convert an account
+// hash into a 64 bit mini hash.
+type accountBloomHasher common.Hash
+
+func (h accountBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (h accountBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (h accountBloomHasher) Reset()                            { panic("not implemented") }
+func (h accountBloomHasher) BlockSize() int                    { panic("not implemented") }
+func (h accountBloomHasher) Size() int                         { return 8 }
+func (h accountBloomHasher) Sum64() uint64 {
+	return binary.BigEndian.Uint64(h[:8])
+}
+
+// storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface
+// API requirements of the bloom library used. It's used to convert an account
+// hash into a 64 bit mini hash.
+type storageBloomHasher [2]common.Hash
+
+func (h storageBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (h storageBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (h storageBloomHasher) Reset()                            { panic("not implemented") }
+func (h storageBloomHasher) BlockSize() int                    { panic("not implemented") }
+func (h storageBloomHasher) Size() int                         { return 8 }
+func (h storageBloomHasher) Sum64() uint64 {
+	return binary.BigEndian.Uint64(h[0][:8]) ^ binary.BigEndian.Uint64(h[1][:8])
+}
+
 // newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
 // level persistent database or a hierarchical diff already.
 func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
@@ -57,9 +127,18 @@ func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]
 		accountData: accounts,
 		storageData: storage,
 	}
-	// Determine mem size
+	switch parent := parent.(type) {
+	case *diskLayer:
+		dl.rebloom(parent)
+	case *diffLayer:
+		dl.rebloom(parent.origin)
+	default:
+		panic("unknown parent type")
+	}
+	// Determine memory size and track the dirty writes
 	for _, data := range accounts {
-		dl.memory += uint64(len(data))
+		dl.memory += uint64(common.HashLength + len(data))
+		snapshotDirtyAccountWriteMeter.Mark(int64(len(data)))
 	}
 	// Fill the storage hashes and sort them for the iterator
 	dl.storageList = make(map[common.Hash][]common.Hash)
@@ -80,16 +159,56 @@ func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]
 		if account, ok := accounts[accountHash]; account == nil || !ok {
 			log.Error(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
 		}
-		// Determine mem size
+		// Determine memory size and track the dirty writes
 		for _, data := range slots {
-			dl.memory += uint64(len(data))
+			dl.memory += uint64(common.HashLength + len(data))
+			snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
 		}
 	}
 	dl.memory += uint64(len(dl.storageList) * common.HashLength)
-
 	return dl
 }
 
+// rebloom discards the layer's current bloom and rebuilds it from scratch based
+// on the parent's and the local diffs.
+func (dl *diffLayer) rebloom(origin *diskLayer) {
+	dl.lock.Lock()
+	defer dl.lock.Unlock()
+
+	defer func(start time.Time) {
+		snapshotBloomIndexTimer.Update(time.Since(start))
+	}(time.Now())
+
+	// Inject the new origin that triggered the rebloom
+	dl.origin = origin
+
+	// Retrieve the parent bloom or create a fresh empty one
+	if parent, ok := dl.parent.(*diffLayer); ok {
+		parent.lock.RLock()
+		dl.diffed, _ = parent.diffed.Copy()
+		parent.lock.RUnlock()
+	} else {
+		dl.diffed, _ = bloomfilter.New(uint64(bloomSize), uint64(bloomFuncs))
+	}
+	// Iterate over all the accounts and storage slots and index them
+	for hash := range dl.accountData {
+		dl.diffed.Add(accountBloomHasher(hash))
+	}
+	for accountHash, slots := range dl.storageData {
+		for storageHash := range slots {
+			dl.diffed.Add(storageBloomHasher{accountHash, storageHash})
+		}
+	}
+	// Calculate the current false positive rate and update the error rate meter.
+	// This is a bit cheating because subsequent layers will overwrite it, but it
+	// should be fine, we're only interested in ballpark figures.
+	k := float64(dl.diffed.K())
+	n := float64(dl.diffed.N())
+	m := float64(dl.diffed.M())
+
+	snapshotBloomErrorGauge.Update(math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k))
+}
+
 // Root returns the root hash for which this snapshot was made.
 func (dl *diffLayer) Root() common.Hash {
 	return dl.root
@@ -124,6 +243,26 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {
 // AccountRLP directly retrieves the account RLP associated with a particular
 // hash in the snapshot slim data format.
 func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
+	// Check the bloom filter first whether there's even a point in reaching into
+	// all the maps in all the layers below
+	dl.lock.RLock()
+	hit := dl.diffed.Contains(accountBloomHasher(hash))
+	dl.lock.RUnlock()
+
+	// If the bloom filter misses, don't even bother with traversing the memory
+	// diff layers, reach straight into the bottom persistent disk layer
+	if !hit {
+		snapshotBloomAccountMissMeter.Mark(1)
+		return dl.origin.AccountRLP(hash)
+	}
+	// The bloom filter hit, start poking in the internal maps
+	return dl.accountRLP(hash)
+}
+
+// accountRLP is an internal version of AccountRLP that skips the bloom filter
+// checks and uses the internal maps to try and retrieve the data. It's meant
+// to be used if a higher layer's bloom filter hit already.
+func (dl *diffLayer) accountRLP(hash common.Hash) ([]byte, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
@@ -135,9 +274,17 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	// If the account is known locally, return it. Note, a nil account means it was
 	// deleted, and is a different notion than an unknown account!
 	if data, ok := dl.accountData[hash]; ok {
+		snapshotDirtyAccountHitMeter.Mark(1)
+		snapshotDirtyAccountReadMeter.Mark(int64(len(data)))
+		snapshotBloomAccountTrueHitMeter.Mark(1)
 		return data, nil
 	}
 	// Account unknown to this diff, resolve from parent
+	if diff, ok := dl.parent.(*diffLayer); ok {
+		return diff.accountRLP(hash)
+	}
+	// Failed to resolve through diff layers, mark a bloom error and use the disk
+	snapshotBloomAccountFalseHitMeter.Mark(1)
 	return dl.parent.AccountRLP(hash)
 }
 
@@ -145,6 +292,26 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 // within a particular account. If the slot is unknown to this diff, it's parent
 // is consulted.
 func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
+	// Check the bloom filter first whether there's even a point in reaching into
+	// all the maps in all the layers below
+	dl.lock.RLock()
+	hit := dl.diffed.Contains(storageBloomHasher{accountHash, storageHash})
+	dl.lock.RUnlock()
+
+	// If the bloom filter misses, don't even bother with traversing the memory
+	// diff layers, reach straight into the bottom persistent disk layer
+	if !hit {
+		snapshotBloomStorageMissMeter.Mark(1)
+		return dl.origin.Storage(accountHash, storageHash)
+	}
+	// The bloom filter hit, start poking in the internal maps
+	return dl.storage(accountHash, storageHash)
+}
+
+// storage is an internal version of Storage that skips the bloom filter checks
+// and uses the internal maps to try and retrieve the data. It's meant  to be
+// used if a higher layer's bloom filter hit already.
+func (dl *diffLayer) storage(accountHash, storageHash common.Hash) ([]byte, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
@@ -157,13 +324,23 @@ func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 	// account means it was deleted, and is a different notion than an unknown account!
 	if storage, ok := dl.storageData[accountHash]; ok {
 		if storage == nil {
+			snapshotDirtyStorageHitMeter.Mark(1)
+			snapshotBloomStorageTrueHitMeter.Mark(1)
 			return nil, nil
 		}
 		if data, ok := storage[storageHash]; ok {
+			snapshotDirtyStorageHitMeter.Mark(1)
+			snapshotDirtyStorageReadMeter.Mark(int64(len(data)))
+			snapshotBloomStorageTrueHitMeter.Mark(1)
 			return data, nil
 		}
 	}
-	// Account - or slot within - unknown to this diff, resolve from parent
+	// Storage slot unknown to this diff, resolve from parent
+	if diff, ok := dl.parent.(*diffLayer); ok {
+		return diff.storage(accountHash, storageHash)
+	}
+	// Failed to resolve through diff layers, mark a bloom error and use the disk
+	snapshotBloomStorageFalseHitMeter.Mark(1)
 	return dl.parent.Storage(accountHash, storageHash)
 }
 
@@ -224,22 +401,11 @@ func (dl *diffLayer) flatten() snapshot {
 		storageData: parent.storageData,
 		accountList: parent.accountList,
 		accountData: parent.accountData,
+		diffed:      dl.diffed,
 		memory:      parent.memory + dl.memory,
 	}
 }
 
-// Journal commits an entire diff hierarchy to disk into a single journal file.
-// This is meant to be used during shutdown to persist the snapshot without
-// flattening everything down (bad for reorgs).
-func (dl *diffLayer) Journal() error {
-	writer, err := dl.journal()
-	if err != nil {
-		return err
-	}
-	writer.Close()
-	return nil
-}
-
 // AccountList returns a sorted list of all accounts in this difflayer.
 func (dl *diffLayer) AccountList() []common.Hash {
 	dl.lock.Lock()
diff --git a/core/state/snapshot/difflayer_journal.go b/core/state/snapshot/difflayer_journal.go
deleted file mode 100644
index 5490531be..000000000
--- a/core/state/snapshot/difflayer_journal.go
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2019 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-
-package snapshot
-
-import (
-	"bufio"
-	"fmt"
-	"io"
-	"os"
-
-	"github.com/ethereum/go-ethereum/common"
-	"github.com/ethereum/go-ethereum/rlp"
-)
-
-// journalAccount is an account entry in a diffLayer's disk journal.
-type journalAccount struct {
-	Hash common.Hash
-	Blob []byte
-}
-
-// journalStorage is an account's storage map in a diffLayer's disk journal.
-type journalStorage struct {
-	Hash common.Hash
-	Keys []common.Hash
-	Vals [][]byte
-}
-
-// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
-// diff and verifying that it can be linked to the requested parent.
-func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
-	// Read the next diff journal entry
-	var root common.Hash
-	if err := r.Decode(&root); err != nil {
-		// The first read may fail with EOF, marking the end of the journal
-		if err == io.EOF {
-			return parent, nil
-		}
-		return nil, fmt.Errorf("load diff root: %v", err)
-	}
-	var accounts []journalAccount
-	if err := r.Decode(&accounts); err != nil {
-		return nil, fmt.Errorf("load diff accounts: %v", err)
-	}
-	accountData := make(map[common.Hash][]byte)
-	for _, entry := range accounts {
-		accountData[entry.Hash] = entry.Blob
-	}
-	var storage []journalStorage
-	if err := r.Decode(&storage); err != nil {
-		return nil, fmt.Errorf("load diff storage: %v", err)
-	}
-	storageData := make(map[common.Hash]map[common.Hash][]byte)
-	for _, entry := range storage {
-		slots := make(map[common.Hash][]byte)
-		for i, key := range entry.Keys {
-			slots[key] = entry.Vals[i]
-		}
-		storageData[entry.Hash] = slots
-	}
-	return loadDiffLayer(newDiffLayer(parent, root, accountData, storageData), r)
-}
-
-// journal is the internal version of Journal that also returns the journal file
-// so subsequent layers know where to write to.
-func (dl *diffLayer) journal() (io.WriteCloser, error) {
-	// If we've reached the bottom, open the journal
-	var writer io.WriteCloser
-	if parent, ok := dl.parent.(*diskLayer); ok {
-		file, err := os.Create(parent.journal)
-		if err != nil {
-			return nil, err
-		}
-		writer = file
-	}
-	// If we haven't reached the bottom yet, journal the parent first
-	if writer == nil {
-		file, err := dl.parent.(*diffLayer).journal()
-		if err != nil {
-			return nil, err
-		}
-		writer = file
-	}
-	dl.lock.RLock()
-	defer dl.lock.RUnlock()
-
-	if dl.stale {
-		writer.Close()
-		return nil, ErrSnapshotStale
-	}
-	// Everything below was journalled, persist this layer too
-	buf := bufio.NewWriter(writer)
-	if err := rlp.Encode(buf, dl.root); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, err
-	}
-	accounts := make([]journalAccount, 0, len(dl.accountData))
-	for hash, blob := range dl.accountData {
-		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
-	}
-	if err := rlp.Encode(buf, accounts); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, err
-	}
-	storage := make([]journalStorage, 0, len(dl.storageData))
-	for hash, slots := range dl.storageData {
-		keys := make([]common.Hash, 0, len(slots))
-		vals := make([][]byte, 0, len(slots))
-		for key, val := range slots {
-			keys = append(keys, key)
-			vals = append(vals, val)
-		}
-		storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
-	}
-	if err := rlp.Encode(buf, storage); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, err
-	}
-	buf.Flush()
-	return writer, nil
-}
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 7cd1e8062..9029bb04b 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -24,7 +24,9 @@ import (
 	"path"
 	"testing"
 
+	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
 	"github.com/ethereum/go-ethereum/rlp"
 )
 
@@ -61,7 +63,7 @@ func TestMergeBasics(t *testing.T) {
 		}
 	}
 	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
 	child := newDiffLayer(parent, common.Hash{}, accounts, storage)
 	child = newDiffLayer(child, common.Hash{}, accounts, storage)
 	child = newDiffLayer(child, common.Hash{}, accounts, storage)
@@ -122,7 +124,7 @@ func TestMergeDelete(t *testing.T) {
 	}
 
 	// Add some flip-flopping layers on top
-	parent := newDiffLayer(emptyLayer{}, common.Hash{}, flip(), storage)
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, flip(), storage)
 	child := parent.Update(common.Hash{}, flop(), storage)
 	child = child.Update(common.Hash{}, flip(), storage)
 	child = child.Update(common.Hash{}, flop(), storage)
@@ -165,7 +167,7 @@ func TestInsertAndMerge(t *testing.T) {
 	{
 		var accounts = make(map[common.Hash][]byte)
 		var storage = make(map[common.Hash]map[common.Hash][]byte)
-		parent = newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
+		parent = newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
 	}
 	{
 		var accounts = make(map[common.Hash][]byte)
@@ -186,34 +188,11 @@ func TestInsertAndMerge(t *testing.T) {
 	}
 }
 
-type emptyLayer struct{}
-
-func (emptyLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
-	panic("implement me")
-}
-
-func (emptyLayer) Journal() error {
-	panic("implement me")
-}
-
-func (emptyLayer) Stale() bool {
-	panic("implement me")
-}
-
-func (emptyLayer) Root() common.Hash {
-	return common.Hash{}
-}
-
-func (emptyLayer) Account(hash common.Hash) (*Account, error) {
-	return nil, nil
-}
-
-func (emptyLayer) AccountRLP(hash common.Hash) ([]byte, error) {
-	return nil, nil
-}
-
-func (emptyLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
-	return nil, nil
+func emptyLayer() *diskLayer {
+	return &diskLayer{
+		diskdb: memorydb.New(),
+		cache:  fastcache.New(500 * 1024),
+	}
 }
 
 // BenchmarkSearch checks how long it takes to find a non-existing key
@@ -234,7 +213,7 @@ func BenchmarkSearch(b *testing.B) {
 		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
 	var layer snapshot
-	layer = emptyLayer{}
+	layer = emptyLayer()
 	for i := 0; i < 128; i++ {
 		layer = fill(layer)
 	}
@@ -272,7 +251,7 @@ func BenchmarkSearchSlot(b *testing.B) {
 		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
 	var layer snapshot
-	layer = emptyLayer{}
+	layer = emptyLayer()
 	for i := 0; i < 128; i++ {
 		layer = fill(layer)
 	}
@@ -313,7 +292,7 @@ func BenchmarkFlatten(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		b.StopTimer()
 		var layer snapshot
-		layer = emptyLayer{}
+		layer = emptyLayer()
 		for i := 1; i < 128; i++ {
 			layer = fill(layer)
 		}
@@ -357,17 +336,14 @@ func BenchmarkJournal(b *testing.B) {
 		}
 		return newDiffLayer(parent, common.Hash{}, accounts, storage)
 	}
-	var layer snapshot
-	layer = &diskLayer{
-		journal: path.Join(os.TempDir(), "difflayer_journal.tmp"),
-	}
+	layer := snapshot(new(diskLayer))
 	for i := 1; i < 128; i++ {
 		layer = fill(layer)
 	}
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		f, _ := layer.(*diffLayer).journal()
+		f, _, _ := layer.Journal(path.Join(os.TempDir(), "difflayer_journal.tmp"))
 		f.Close()
 	}
 }
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index 474182f1d..b1934d273 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -17,6 +17,7 @@
 package snapshot
 
 import (
+	"bytes"
 	"sync"
 
 	"github.com/VictoriaMetrics/fastcache"
@@ -24,17 +25,21 @@ import (
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
 )
 
 // diskLayer is a low level persistent snapshot built on top of a key-value store.
 type diskLayer struct {
-	journal string              // Path of the snapshot journal to use on shutdown
-	db      ethdb.KeyValueStore // Key-value store containing the base snapshot
-	cache   *fastcache.Cache    // Cache to avoid hitting the disk for direct access
+	diskdb ethdb.KeyValueStore // Key-value store containing the base snapshot
+	triedb *trie.Database      // Trie node cache for reconstuction purposes
+	cache  *fastcache.Cache    // Cache to avoid hitting the disk for direct access
 
 	root  common.Hash // Root hash of the base snapshot
 	stale bool        // Signals that the layer became stale (state progressed)
 
+	genMarker []byte                    // Marker for the state that's indexed during initial layer generation
+	genAbort  chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer
+
 	lock sync.RWMutex
 }
 
@@ -80,18 +85,26 @@ func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	if dl.stale {
 		return nil, ErrSnapshotStale
 	}
+	// If the layer is being generated, ensure the requested hash has already been
+	// covered by the generator.
+	if dl.genMarker != nil && bytes.Compare(hash[:], dl.genMarker) > 0 {
+		return nil, ErrNotCoveredYet
+	}
+	// If we're in the disk layer, all diff layers missed
+	snapshotDirtyAccountMissMeter.Mark(1)
+
 	// Try to retrieve the account from the memory cache
-	if blob := dl.cache.Get(nil, hash[:]); blob != nil {
-		snapshotCleanHitMeter.Mark(1)
-		snapshotCleanReadMeter.Mark(int64(len(blob)))
+	if blob, found := dl.cache.HasGet(nil, hash[:]); found {
+		snapshotCleanAccountHitMeter.Mark(1)
+		snapshotCleanAccountReadMeter.Mark(int64(len(blob)))
 		return blob, nil
 	}
 	// Cache doesn't contain account, pull from disk and cache for later
-	blob := rawdb.ReadAccountSnapshot(dl.db, hash)
+	blob := rawdb.ReadAccountSnapshot(dl.diskdb, hash)
 	dl.cache.Set(hash[:], blob)
 
-	snapshotCleanMissMeter.Mark(1)
-	snapshotCleanWriteMeter.Mark(int64(len(blob)))
+	snapshotCleanAccountMissMeter.Mark(1)
+	snapshotCleanAccountWriteMeter.Mark(int64(len(blob)))
 
 	return blob, nil
 }
@@ -109,18 +122,26 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 	}
 	key := append(accountHash[:], storageHash[:]...)
 
+	// If the layer is being generated, ensure the requested hash has already been
+	// covered by the generator.
+	if dl.genMarker != nil && bytes.Compare(key, dl.genMarker) > 0 {
+		return nil, ErrNotCoveredYet
+	}
+	// If we're in the disk layer, all diff layers missed
+	snapshotDirtyStorageMissMeter.Mark(1)
+
 	// Try to retrieve the storage slot from the memory cache
-	if blob := dl.cache.Get(nil, key); blob != nil {
-		snapshotCleanHitMeter.Mark(1)
-		snapshotCleanReadMeter.Mark(int64(len(blob)))
+	if blob, found := dl.cache.HasGet(nil, key); found {
+		snapshotCleanStorageHitMeter.Mark(1)
+		snapshotCleanStorageReadMeter.Mark(int64(len(blob)))
 		return blob, nil
 	}
 	// Cache doesn't contain storage slot, pull from disk and cache for later
-	blob := rawdb.ReadStorageSnapshot(dl.db, accountHash, storageHash)
+	blob := rawdb.ReadStorageSnapshot(dl.diskdb, accountHash, storageHash)
 	dl.cache.Set(key, blob)
 
-	snapshotCleanMissMeter.Mark(1)
-	snapshotCleanWriteMeter.Mark(int64(len(blob)))
+	snapshotCleanStorageMissMeter.Mark(1)
+	snapshotCleanStorageWriteMeter.Mark(int64(len(blob)))
 
 	return blob, nil
 }
@@ -131,9 +152,3 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
 	return newDiffLayer(dl, blockHash, accounts, storage)
 }
-
-// Journal commits an entire diff hierarchy to disk into a single journal file.
-func (dl *diskLayer) Journal() error {
-	// There's no journalling a disk layer
-	return nil
-}
diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go
new file mode 100644
index 000000000..30b690454
--- /dev/null
+++ b/core/state/snapshot/disklayer_test.go
@@ -0,0 +1,433 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+)
+
+// reverse reverses the contents of a byte slice. It's used to update random accs
+// with deterministic changes.
+func reverse(blob []byte) []byte {
+	res := make([]byte, len(blob))
+	for i, b := range blob {
+		res[len(blob)-1-i] = b
+	}
+	return res
+}
+
+// Tests that merging something into a disk layer persists it into the database
+// and invalidates any previously written and cached values.
+func TestDiskMerge(t *testing.T) {
+	// Create some accounts in the disk layer
+	db := memorydb.New()
+
+	var (
+		accNoModNoCache     = common.Hash{0x1}
+		accNoModCache       = common.Hash{0x2}
+		accModNoCache       = common.Hash{0x3}
+		accModCache         = common.Hash{0x4}
+		accDelNoCache       = common.Hash{0x5}
+		accDelCache         = common.Hash{0x6}
+		conNoModNoCache     = common.Hash{0x7}
+		conNoModNoCacheSlot = common.Hash{0x70}
+		conNoModCache       = common.Hash{0x8}
+		conNoModCacheSlot   = common.Hash{0x80}
+		conModNoCache       = common.Hash{0x9}
+		conModNoCacheSlot   = common.Hash{0x90}
+		conModCache         = common.Hash{0xa}
+		conModCacheSlot     = common.Hash{0xa0}
+		conDelNoCache       = common.Hash{0xb}
+		conDelNoCacheSlot   = common.Hash{0xb0}
+		conDelCache         = common.Hash{0xc}
+		conDelCacheSlot     = common.Hash{0xc0}
+		conNukeNoCache      = common.Hash{0xd}
+		conNukeNoCacheSlot  = common.Hash{0xd0}
+		conNukeCache        = common.Hash{0xe}
+		conNukeCacheSlot    = common.Hash{0xe0}
+		baseRoot            = randomHash()
+		diffRoot            = randomHash()
+	)
+
+	rawdb.WriteAccountSnapshot(db, accNoModNoCache, accNoModNoCache[:])
+	rawdb.WriteAccountSnapshot(db, accNoModCache, accNoModCache[:])
+	rawdb.WriteAccountSnapshot(db, accModNoCache, accModNoCache[:])
+	rawdb.WriteAccountSnapshot(db, accModCache, accModCache[:])
+	rawdb.WriteAccountSnapshot(db, accDelNoCache, accDelNoCache[:])
+	rawdb.WriteAccountSnapshot(db, accDelCache, accDelCache[:])
+
+	rawdb.WriteAccountSnapshot(db, conNoModNoCache, conNoModNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conNoModCache, conNoModCache[:])
+	rawdb.WriteStorageSnapshot(db, conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conModNoCache, conModNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conModNoCache, conModNoCacheSlot, conModNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conModCache, conModCache[:])
+	rawdb.WriteStorageSnapshot(db, conModCache, conModCacheSlot, conModCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conDelNoCache, conDelNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conDelNoCache, conDelNoCacheSlot, conDelNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conDelCache, conDelCache[:])
+	rawdb.WriteStorageSnapshot(db, conDelCache, conDelCacheSlot, conDelCacheSlot[:])
+
+	rawdb.WriteAccountSnapshot(db, conNukeNoCache, conNukeNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conNukeNoCache, conNukeNoCacheSlot, conNukeNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conNukeCache, conNukeCache[:])
+	rawdb.WriteStorageSnapshot(db, conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
+
+	rawdb.WriteSnapshotRoot(db, baseRoot)
+
+	// Create a disk layer based on the above and cache in some data
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			baseRoot: &diskLayer{
+				diskdb: db,
+				cache:  fastcache.New(500 * 1024),
+				root:   baseRoot,
+			},
+		},
+	}
+	base := snaps.Snapshot(baseRoot)
+	base.AccountRLP(accNoModCache)
+	base.AccountRLP(accModCache)
+	base.AccountRLP(accDelCache)
+	base.Storage(conNoModCache, conNoModCacheSlot)
+	base.Storage(conModCache, conModCacheSlot)
+	base.Storage(conDelCache, conDelCacheSlot)
+	base.Storage(conNukeCache, conNukeCacheSlot)
+
+	// Modify or delete some accounts, flatten everything onto disk
+	if err := snaps.Update(diffRoot, baseRoot, map[common.Hash][]byte{
+		accModNoCache:  reverse(accModNoCache[:]),
+		accModCache:    reverse(accModCache[:]),
+		accDelNoCache:  nil,
+		accDelCache:    nil,
+		conNukeNoCache: nil,
+		conNukeCache:   nil,
+	}, map[common.Hash]map[common.Hash][]byte{
+		conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])},
+		conModCache:   {conModCacheSlot: reverse(conModCacheSlot[:])},
+		conDelNoCache: {conDelNoCacheSlot: nil},
+		conDelCache:   {conDelCacheSlot: nil},
+	}); err != nil {
+		t.Fatalf("failed to update snapshot tree: %v", err)
+	}
+	if err := snaps.Cap(diffRoot, 0); err != nil {
+		t.Fatalf("failed to flatten snapshot tree: %v", err)
+	}
+	// Retrieve all the data through the disk layer and validate it
+	base = snaps.Snapshot(diffRoot)
+	if _, ok := base.(*diskLayer); !ok {
+		t.Fatalf("update not flattend into the disk layer")
+	}
+
+	// assertAccount ensures that an account matches the given blob.
+	assertAccount := func(account common.Hash, data []byte) {
+		t.Helper()
+		blob, err := base.AccountRLP(account)
+		if err != nil {
+			t.Errorf("account access (%x) failed: %v", account, err)
+		} else if !bytes.Equal(blob, data) {
+			t.Errorf("account access (%x) mismatch: have %x, want %x", account, blob, data)
+		}
+	}
+	assertAccount(accNoModNoCache, accNoModNoCache[:])
+	assertAccount(accNoModCache, accNoModCache[:])
+	assertAccount(accModNoCache, reverse(accModNoCache[:]))
+	assertAccount(accModCache, reverse(accModCache[:]))
+	assertAccount(accDelNoCache, nil)
+	assertAccount(accDelCache, nil)
+
+	// assertStorage ensures that a storage slot matches the given blob.
+	assertStorage := func(account common.Hash, slot common.Hash, data []byte) {
+		t.Helper()
+		blob, err := base.Storage(account, slot)
+		if err != nil {
+			t.Errorf("storage access (%x:%x) failed: %v", account, slot, err)
+		} else if !bytes.Equal(blob, data) {
+			t.Errorf("storage access (%x:%x) mismatch: have %x, want %x", account, slot, blob, data)
+		}
+	}
+	assertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+	assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+	assertStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+	assertStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+	assertStorage(conDelNoCache, conDelNoCacheSlot, nil)
+	assertStorage(conDelCache, conDelCacheSlot, nil)
+	assertStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+	assertStorage(conNukeCache, conNukeCacheSlot, nil)
+
+	// Retrieve all the data directly from the database and validate it
+
+	// assertDatabaseAccount ensures that an account from the database matches the given blob.
+	assertDatabaseAccount := func(account common.Hash, data []byte) {
+		t.Helper()
+		if blob := rawdb.ReadAccountSnapshot(db, account); !bytes.Equal(blob, data) {
+			t.Errorf("account database access (%x) mismatch: have %x, want %x", account, blob, data)
+		}
+	}
+	assertDatabaseAccount(accNoModNoCache, accNoModNoCache[:])
+	assertDatabaseAccount(accNoModCache, accNoModCache[:])
+	assertDatabaseAccount(accModNoCache, reverse(accModNoCache[:]))
+	assertDatabaseAccount(accModCache, reverse(accModCache[:]))
+	assertDatabaseAccount(accDelNoCache, nil)
+	assertDatabaseAccount(accDelCache, nil)
+
+	// assertDatabaseStorage ensures that a storage slot from the database matches the given blob.
+	assertDatabaseStorage := func(account common.Hash, slot common.Hash, data []byte) {
+		t.Helper()
+		if blob := rawdb.ReadStorageSnapshot(db, account, slot); !bytes.Equal(blob, data) {
+			t.Errorf("storage database access (%x:%x) mismatch: have %x, want %x", account, slot, blob, data)
+		}
+	}
+	assertDatabaseStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+	assertDatabaseStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+	assertDatabaseStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+	assertDatabaseStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+	assertDatabaseStorage(conDelNoCache, conDelNoCacheSlot, nil)
+	assertDatabaseStorage(conDelCache, conDelCacheSlot, nil)
+	assertDatabaseStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+	assertDatabaseStorage(conNukeCache, conNukeCacheSlot, nil)
+}
+
+// Tests that merging something into a disk layer persists it into the database
+// and invalidates any previously written and cached values, discarding anything
+// after the in-progress generation marker.
+func TestDiskPartialMerge(t *testing.T) {
+	// Iterate the test a few times to ensure we pick various internal orderings
+	// for the data slots as well as the progress marker.
+	for i := 0; i < 1024; i++ {
+		// Create some accounts in the disk layer
+		db := memorydb.New()
+
+		var (
+			accNoModNoCache     = randomHash()
+			accNoModCache       = randomHash()
+			accModNoCache       = randomHash()
+			accModCache         = randomHash()
+			accDelNoCache       = randomHash()
+			accDelCache         = randomHash()
+			conNoModNoCache     = randomHash()
+			conNoModNoCacheSlot = randomHash()
+			conNoModCache       = randomHash()
+			conNoModCacheSlot   = randomHash()
+			conModNoCache       = randomHash()
+			conModNoCacheSlot   = randomHash()
+			conModCache         = randomHash()
+			conModCacheSlot     = randomHash()
+			conDelNoCache       = randomHash()
+			conDelNoCacheSlot   = randomHash()
+			conDelCache         = randomHash()
+			conDelCacheSlot     = randomHash()
+			conNukeNoCache      = randomHash()
+			conNukeNoCacheSlot  = randomHash()
+			conNukeCache        = randomHash()
+			conNukeCacheSlot    = randomHash()
+			baseRoot            = randomHash()
+			diffRoot            = randomHash()
+			genMarker           = append(randomHash().Bytes(), randomHash().Bytes()...)
+		)
+
+		// insertAccount injects an account into the database if it's after the
+		// generator marker, drops the op otherwise. This is needed to seed the
+		// database with a valid starting snapshot.
+		insertAccount := func(account common.Hash, data []byte) {
+			if bytes.Compare(account[:], genMarker) <= 0 {
+				rawdb.WriteAccountSnapshot(db, account, data[:])
+			}
+		}
+		insertAccount(accNoModNoCache, accNoModNoCache[:])
+		insertAccount(accNoModCache, accNoModCache[:])
+		insertAccount(accModNoCache, accModNoCache[:])
+		insertAccount(accModCache, accModCache[:])
+		insertAccount(accDelNoCache, accDelNoCache[:])
+		insertAccount(accDelCache, accDelCache[:])
+
+		// insertStorage injects a storage slot into the database if it's after
+		// the  generator marker, drops the op otherwise. This is needed to seed
+		// the  database with a valid starting snapshot.
+		insertStorage := func(account common.Hash, slot common.Hash, data []byte) {
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 {
+				rawdb.WriteStorageSnapshot(db, account, slot, data[:])
+			}
+		}
+		insertAccount(conNoModNoCache, conNoModNoCache[:])
+		insertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+		insertAccount(conNoModCache, conNoModCache[:])
+		insertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		insertAccount(conModNoCache, conModNoCache[:])
+		insertStorage(conModNoCache, conModNoCacheSlot, conModNoCacheSlot[:])
+		insertAccount(conModCache, conModCache[:])
+		insertStorage(conModCache, conModCacheSlot, conModCacheSlot[:])
+		insertAccount(conDelNoCache, conDelNoCache[:])
+		insertStorage(conDelNoCache, conDelNoCacheSlot, conDelNoCacheSlot[:])
+		insertAccount(conDelCache, conDelCache[:])
+		insertStorage(conDelCache, conDelCacheSlot, conDelCacheSlot[:])
+
+		insertAccount(conNukeNoCache, conNukeNoCache[:])
+		insertStorage(conNukeNoCache, conNukeNoCacheSlot, conNukeNoCacheSlot[:])
+		insertAccount(conNukeCache, conNukeCache[:])
+		insertStorage(conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
+
+		rawdb.WriteSnapshotRoot(db, baseRoot)
+
+		// Create a disk layer based on the above using a random progress marker
+		// and cache in some data.
+		snaps := &Tree{
+			layers: map[common.Hash]snapshot{
+				baseRoot: &diskLayer{
+					diskdb: db,
+					cache:  fastcache.New(500 * 1024),
+					root:   baseRoot,
+				},
+			},
+		}
+		snaps.layers[baseRoot].(*diskLayer).genMarker = genMarker
+		base := snaps.Snapshot(baseRoot)
+
+		// assertAccount ensures that an account matches the given blob if it's
+		// already covered by the disk snapshot, and errors out otherwise.
+		assertAccount := func(account common.Hash, data []byte) {
+			t.Helper()
+			blob, err := base.AccountRLP(account)
+			if bytes.Compare(account[:], genMarker) > 0 && err != ErrNotCoveredYet {
+				t.Fatalf("test %d: post-marker (%x) account access (%x) succeded: %x", i, genMarker, account, blob)
+			}
+			if bytes.Compare(account[:], genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) account access (%x) mismatch: have %x, want %x", i, genMarker, account, blob, data)
+			}
+		}
+		assertAccount(accNoModCache, accNoModCache[:])
+		assertAccount(accModCache, accModCache[:])
+		assertAccount(accDelCache, accDelCache[:])
+
+		// assertStorage ensures that a storage slot matches the given blob if
+		// it's already covered by the disk snapshot, and errors out otherwise.
+		assertStorage := func(account common.Hash, slot common.Hash, data []byte) {
+			t.Helper()
+			blob, err := base.Storage(account, slot)
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) > 0 && err != ErrNotCoveredYet {
+				t.Fatalf("test %d: post-marker (%x) storage access (%x:%x) succeded: %x", i, genMarker, account, slot, blob)
+			}
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) storage access (%x:%x) mismatch: have %x, want %x", i, genMarker, account, slot, blob, data)
+			}
+		}
+		assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		assertStorage(conModCache, conModCacheSlot, conModCacheSlot[:])
+		assertStorage(conDelCache, conDelCacheSlot, conDelCacheSlot[:])
+		assertStorage(conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
+
+		// Modify or delete some accounts, flatten everything onto disk
+		if err := snaps.Update(diffRoot, baseRoot, map[common.Hash][]byte{
+			accModNoCache:  reverse(accModNoCache[:]),
+			accModCache:    reverse(accModCache[:]),
+			accDelNoCache:  nil,
+			accDelCache:    nil,
+			conNukeNoCache: nil,
+			conNukeCache:   nil,
+		}, map[common.Hash]map[common.Hash][]byte{
+			conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])},
+			conModCache:   {conModCacheSlot: reverse(conModCacheSlot[:])},
+			conDelNoCache: {conDelNoCacheSlot: nil},
+			conDelCache:   {conDelCacheSlot: nil},
+		}); err != nil {
+			t.Fatalf("test %d: failed to update snapshot tree: %v", i, err)
+		}
+		if err := snaps.Cap(diffRoot, 0); err != nil {
+			t.Fatalf("test %d: failed to flatten snapshot tree: %v", i, err)
+		}
+		// Retrieve all the data through the disk layer and validate it
+		base = snaps.Snapshot(diffRoot)
+		if _, ok := base.(*diskLayer); !ok {
+			t.Fatalf("test %d: update not flattend into the disk layer", i)
+		}
+		assertAccount(accNoModNoCache, accNoModNoCache[:])
+		assertAccount(accNoModCache, accNoModCache[:])
+		assertAccount(accModNoCache, reverse(accModNoCache[:]))
+		assertAccount(accModCache, reverse(accModCache[:]))
+		assertAccount(accDelNoCache, nil)
+		assertAccount(accDelCache, nil)
+
+		assertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+		assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		assertStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+		assertStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+		assertStorage(conDelNoCache, conDelNoCacheSlot, nil)
+		assertStorage(conDelCache, conDelCacheSlot, nil)
+		assertStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+		assertStorage(conNukeCache, conNukeCacheSlot, nil)
+
+		// Retrieve all the data directly from the database and validate it
+
+		// assertDatabaseAccount ensures that an account inside the database matches
+		// the given blob if it's already covered by the disk snapshot, and does not
+		// exist otherwise.
+		assertDatabaseAccount := func(account common.Hash, data []byte) {
+			t.Helper()
+			blob := rawdb.ReadAccountSnapshot(db, account)
+			if bytes.Compare(account[:], genMarker) > 0 && blob != nil {
+				t.Fatalf("test %d: post-marker (%x) account database access (%x) succeded: %x", i, genMarker, account, blob)
+			}
+			if bytes.Compare(account[:], genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) account database access (%x) mismatch: have %x, want %x", i, genMarker, account, blob, data)
+			}
+		}
+		assertDatabaseAccount(accNoModNoCache, accNoModNoCache[:])
+		assertDatabaseAccount(accNoModCache, accNoModCache[:])
+		assertDatabaseAccount(accModNoCache, reverse(accModNoCache[:]))
+		assertDatabaseAccount(accModCache, reverse(accModCache[:]))
+		assertDatabaseAccount(accDelNoCache, nil)
+		assertDatabaseAccount(accDelCache, nil)
+
+		// assertDatabaseStorage ensures that a storage slot inside the database
+		// matches the given blob if it's already covered by the disk snapshot,
+		// and does not exist otherwise.
+		assertDatabaseStorage := func(account common.Hash, slot common.Hash, data []byte) {
+			t.Helper()
+			blob := rawdb.ReadStorageSnapshot(db, account, slot)
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) > 0 && blob != nil {
+				t.Fatalf("test %d: post-marker (%x) storage database access (%x:%x) succeded: %x", i, genMarker, account, slot, blob)
+			}
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) storage database access (%x:%x) mismatch: have %x, want %x", i, genMarker, account, slot, blob, data)
+			}
+		}
+		assertDatabaseStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+		assertDatabaseStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		assertDatabaseStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+		assertDatabaseStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+		assertDatabaseStorage(conDelNoCache, conDelNoCacheSlot, nil)
+		assertDatabaseStorage(conDelCache, conDelCacheSlot, nil)
+		assertDatabaseStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+		assertDatabaseStorage(conNukeCache, conNukeCacheSlot, nil)
+	}
+}
+
+// Tests that merging something into a disk layer persists it into the database
+// and invalidates any previously written and cached values, discarding anything
+// after the in-progress generation marker.
+//
+// This test case is a tiny specialized case of TestDiskPartialMerge, which tests
+// some very specific cornercases that random tests won't ever trigger.
+func TestDiskMidAccountPartialMerge(t *testing.T) {
+}
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
index 445a6ebd9..0f9e5fae5 100644
--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -18,12 +18,13 @@ package snapshot
 
 import (
 	"bytes"
-	"fmt"
+	"encoding/binary"
 	"math/big"
 	"time"
 
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/common/math"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb"
@@ -40,103 +41,122 @@ var (
 	emptyCode = crypto.Keccak256Hash(nil)
 )
 
-// wipeSnapshot iterates over the entire key-value database and deletes all the
-// data associated with the snapshot (accounts, storage, metadata). After all is
-// done, the snapshot range of the database is compacted to free up unused data
-// blocks.
-func wipeSnapshot(db ethdb.KeyValueStore) error {
-	// Batch deletions together to avoid holding an iterator for too long
-	var (
-		batch = db.NewBatch()
-		items int
-	)
-	// Iterate over the snapshot key-range and delete all of them
-	log.Info("Deleting previous snapshot leftovers")
-	start, logged := time.Now(), time.Now()
-
-	it := db.NewIteratorWithStart(rawdb.StateSnapshotPrefix)
-	for it.Next() {
-		// Skip any keys with the correct prefix but wrong lenth (trie nodes)
-		key := it.Key()
-		if !bytes.HasPrefix(key, rawdb.StateSnapshotPrefix) {
-			break
-		}
-		if len(key) != len(rawdb.StateSnapshotPrefix)+common.HashLength && len(key) != len(rawdb.StateSnapshotPrefix)+2*common.HashLength {
-			continue
-		}
-		// Delete the key and periodically recreate the batch and iterator
-		batch.Delete(key)
-		items++
-
-		if items%10000 == 0 {
-			// Batch too large (or iterator too long lived, flush and recreate)
-			it.Release()
-			if err := batch.Write(); err != nil {
-				return err
-			}
-			batch.Reset()
-			it = db.NewIteratorWithStart(key)
-
-			if time.Since(logged) > 8*time.Second {
-				log.Info("Deleting previous snapshot leftovers", "wiped", items, "elapsed", time.Since(start))
-				logged = time.Now()
-			}
-		}
-	}
-	it.Release()
-
-	rawdb.DeleteSnapshotRoot(batch)
-	if err := batch.Write(); err != nil {
-		return err
-	}
-	log.Info("Deleted previous snapshot leftovers", "wiped", items, "elapsed", time.Since(start))
-
-	// Compact the snapshot section of the database to get rid of unused space
-	log.Info("Compacting snapshot area in database")
-	start = time.Now()
-
-	end := common.CopyBytes(rawdb.StateSnapshotPrefix)
-	end[len(end)-1]++
-
-	if err := db.Compact(rawdb.StateSnapshotPrefix, end); err != nil {
-		return err
-	}
-	log.Info("Compacted snapshot area in database", "elapsed", time.Since(start))
-
-	return nil
+// generatorStats is a collection of statistics gathered by the snapshot generator
+// for  logging purposes.
+type generatorStats struct {
+	wiping   chan struct{}      // Notification channel if wiping is in progress
+	origin   uint64             // Origin prefix where generation started
+	start    time.Time          // Timestamp when generation started
+	accounts uint64             // Number of accounts indexed
+	slots    uint64             // Number of storage slots indexed
+	storage  common.StorageSize // Account and storage slot size
 }
 
-// generateSnapshot regenerates a brand new snapshot based on an existing state database and head block.
-func generateSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash) (snapshot, error) {
-	// Wipe any previously existing snapshot from the database
-	if err := wipeSnapshot(db); err != nil {
-		return nil, err
-	}
-	// Iterate the entire storage trie and re-generate the state snapshot
-	var (
-		accountCount int
-		storageCount int
-		storageNodes int
-		accountSize  common.StorageSize
-		storageSize  common.StorageSize
-		logged       time.Time
-	)
-	batch := db.NewBatch()
-	triedb := trie.NewDatabase(db)
+// Log creates an contextual log with the given message and the context pulled
+// from the internally maintained statistics.
+func (gs *generatorStats) Log(msg string, marker []byte) {
+	var ctx []interface{}
 
-	accTrie, err := trie.NewSecure(root, triedb)
-	if err != nil {
-		return nil, err
+	// Figure out whether we're after or within an account
+	switch len(marker) {
+	case common.HashLength:
+		ctx = append(ctx, []interface{}{"at", common.BytesToHash(marker)}...)
+	case 2 * common.HashLength:
+		ctx = append(ctx, []interface{}{
+			"in", common.BytesToHash(marker[:common.HashLength]),
+			"at", common.BytesToHash(marker[common.HashLength:]),
+		}...)
 	}
-	accIt := trie.NewIterator(accTrie.NodeIterator(nil))
+	// Add the usual measurements
+	ctx = append(ctx, []interface{}{
+		"accounts", gs.accounts,
+		"slots", gs.slots,
+		"storage", gs.storage,
+		"elapsed", common.PrettyDuration(time.Since(gs.start)),
+	}...)
+	// Calculate the estimated indexing time based on current stats
+	if len(marker) > 0 {
+		if done := binary.BigEndian.Uint64(marker[:8]) - gs.origin; done > 0 {
+			left := math.MaxUint64 - binary.BigEndian.Uint64(marker[:8])
+
+			speed := done/uint64(time.Since(gs.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
+			ctx = append(ctx, []interface{}{
+				"eta", common.PrettyDuration(time.Duration(left/speed) * time.Millisecond),
+			}...)
+		}
+	}
+	log.Info(msg, ctx...)
+}
+
+// generateSnapshot regenerates a brand new snapshot based on an existing state
+// database and head block asynchronously. The snapshot is returned immediately
+// and generation is continued in the background until done.
+func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, wiper chan struct{}) *diskLayer {
+	// Wipe any previously existing snapshot from the database if no wiper is
+	// currenty in progress.
+	if wiper == nil {
+		wiper = wipeSnapshot(diskdb, true)
+	}
+	// Create a new disk layer with an initialized state marker at zero
+	rawdb.WriteSnapshotRoot(diskdb, root)
+
+	base := &diskLayer{
+		diskdb:    diskdb,
+		triedb:    triedb,
+		root:      root,
+		cache:     fastcache.New(cache * 1024 * 1024),
+		genMarker: []byte{}, // Initialized but empty!
+		genAbort:  make(chan chan *generatorStats),
+	}
+	go base.generate(&generatorStats{wiping: wiper, start: time.Now()})
+	return base
+}
+
+// generate is a background thread that iterates over the state and storage tries,
+// constructing the state snapshot. All the arguments are purely for statistics
+// gethering and logging, since the method surfs the blocks as they arrive, often
+// being restarted.
+func (dl *diskLayer) generate(stats *generatorStats) {
+	// If a database wipe is in operation, wait until it's done
+	if stats.wiping != nil {
+		stats.Log("Wiper running, state snapshotting paused", dl.genMarker)
+		select {
+		// If wiper is done, resume normal mode of operation
+		case <-stats.wiping:
+			stats.wiping = nil
+			stats.start = time.Now()
+
+		// If generator was aboted during wipe, return
+		case abort := <-dl.genAbort:
+			abort <- stats
+			return
+		}
+	}
+	// Create an account and state iterator pointing to the current generator marker
+	accTrie, err := trie.NewSecure(dl.root, dl.triedb)
+	if err != nil {
+		// The account trie is missing (GC), surf the chain until one becomes available
+		stats.Log("Trie missing, state snapshotting paused", dl.genMarker)
+
+		abort := <-dl.genAbort
+		abort <- stats
+		return
+	}
+	stats.Log("Resuming state snapshot generation", dl.genMarker)
+
+	var accMarker []byte
+	if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that
+		accMarker = dl.genMarker[:common.HashLength]
+	}
+	accIt := trie.NewIterator(accTrie.NodeIterator(accMarker))
+	batch := dl.diskdb.NewBatch()
+
+	// Iterate from the previous marker and continue generating the state snapshot
+	logged := time.Now()
 	for accIt.Next() {
-		var (
-			curStorageCount int
-			curStorageNodes int
-			curAccountSize  common.StorageSize
-			curStorageSize  common.StorageSize
-			accountHash     = common.BytesToHash(accIt.Key)
-		)
+		// Retrieve the current account and flatten it into the internal format
+		accountHash := common.BytesToHash(accIt.Key)
+
 		var acc struct {
 			Nonce    uint64
 			Balance  *big.Int
@@ -144,63 +164,97 @@ func generateSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash)
 			CodeHash []byte
 		}
 		if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil {
-			return nil, err
+			log.Crit("Invalid account encountered during snapshot creation", "err", err)
 		}
 		data := AccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
-		curAccountSize += common.StorageSize(1 + common.HashLength + len(data))
 
-		rawdb.WriteAccountSnapshot(batch, accountHash, data)
-		if batch.ValueSize() > ethdb.IdealBatchSize {
-			batch.Write()
-			batch.Reset()
+		// If the account is not yet in-progress, write it out
+		if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) {
+			rawdb.WriteAccountSnapshot(batch, accountHash, data)
+			stats.storage += common.StorageSize(1 + common.HashLength + len(data))
+			stats.accounts++
 		}
-		if acc.Root != emptyRoot {
-			storeTrie, err := trie.NewSecure(acc.Root, triedb)
-			if err != nil {
-				return nil, err
-			}
-			storeIt := trie.NewIterator(storeTrie.NodeIterator(nil))
-			for storeIt.Next() {
-				curStorageSize += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value))
-				curStorageCount++
+		// If we've exceeded our batch allowance or termination was requested, flush to disk
+		var abort chan *generatorStats
+		select {
+		case abort = <-dl.genAbort:
+		default:
+		}
+		if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
+			// Only write and set the marker if we actually did something useful
+			if batch.ValueSize() > 0 {
+				batch.Write()
+				batch.Reset()
 
+				dl.lock.Lock()
+				dl.genMarker = accountHash[:]
+				dl.lock.Unlock()
+			}
+			if abort != nil {
+				stats.Log("Aborting state snapshot generation", accountHash[:])
+				abort <- stats
+				return
+			}
+		}
+		// If the account is in-progress, continue where we left off (otherwise iterate all)
+		if acc.Root != emptyRoot {
+			storeTrie, err := trie.NewSecure(acc.Root, dl.triedb)
+			if err != nil {
+				log.Crit("Storage trie inaccessible for snapshot generation", "err", err)
+			}
+			var storeMarker []byte
+			if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength {
+				storeMarker = dl.genMarker[common.HashLength:]
+			}
+			storeIt := trie.NewIterator(storeTrie.NodeIterator(storeMarker))
+			for storeIt.Next() {
 				rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(storeIt.Key), storeIt.Value)
-				if batch.ValueSize() > ethdb.IdealBatchSize {
-					batch.Write()
-					batch.Reset()
+				stats.storage += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value))
+				stats.slots++
+
+				// If we've exceeded our batch allowance or termination was requested, flush to disk
+				var abort chan *generatorStats
+				select {
+				case abort = <-dl.genAbort:
+				default:
+				}
+				if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
+					// Only write and set the marker if we actually did something useful
+					if batch.ValueSize() > 0 {
+						batch.Write()
+						batch.Reset()
+
+						dl.lock.Lock()
+						dl.genMarker = append(accountHash[:], storeIt.Key...)
+						dl.lock.Unlock()
+					}
+					if abort != nil {
+						stats.Log("Aborting state snapshot generation", append(accountHash[:], storeIt.Key...))
+						abort <- stats
+						return
+					}
 				}
 			}
-			curStorageNodes = storeIt.Nodes
 		}
-		accountCount++
-		storageCount += curStorageCount
-		accountSize += curAccountSize
-		storageSize += curStorageSize
-		storageNodes += curStorageNodes
-
 		if time.Since(logged) > 8*time.Second {
-			fmt.Printf("%#x: %9s + %9s (%6d slots, %6d nodes), total %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accIt.Key, curAccountSize.TerminalString(), curStorageSize.TerminalString(), curStorageCount, curStorageNodes, accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes)
+			stats.Log("Generating state snapshot", accIt.Key)
 			logged = time.Now()
 		}
+		// Some account processed, unmark the marker
+		accMarker = nil
 	}
-	fmt.Printf("Totals: %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes)
-
-	// Update the snapshot block marker and write any remainder data
-	rawdb.WriteSnapshotRoot(batch, root)
-	batch.Write()
-	batch.Reset()
-
-	// Compact the snapshot section of the database to get rid of unused space
-	log.Info("Compacting snapshot in chain database")
-	if err := db.Compact([]byte{'s'}, []byte{'s' + 1}); err != nil {
-		return nil, err
+	// Snapshot fully generated, set the marker to nil
+	if batch.ValueSize() > 0 {
+		batch.Write()
 	}
-	// New snapshot generated, construct a brand new base layer
-	cache := fastcache.New(512 * 1024 * 1024)
-	return &diskLayer{
-		journal: journal,
-		db:      db,
-		cache:   cache,
-		root:    root,
-	}, nil
+	log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots,
+		"storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start)))
+
+	dl.lock.Lock()
+	dl.genMarker = nil
+	dl.lock.Unlock()
+
+	// Someone will be looking for us, wait it out
+	abort := <-dl.genAbort
+	abort <- nil
 }
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
new file mode 100644
index 000000000..1c6c63a0b
--- /dev/null
+++ b/core/state/snapshot/journal.go
@@ -0,0 +1,257 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bufio"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"time"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// journalGenerator is a disk layer entry containing the generator progress marker.
+type journalGenerator struct {
+	Wiping   bool // Whether the database was in progress of being wiped
+	Done     bool // Whether the generator finished creating the snapshot
+	Marker   []byte
+	Accounts uint64
+	Slots    uint64
+	Storage  uint64
+}
+
+// journalAccount is an account entry in a diffLayer's disk journal.
+type journalAccount struct {
+	Hash common.Hash
+	Blob []byte
+}
+
+// journalStorage is an account's storage map in a diffLayer's disk journal.
+type journalStorage struct {
+	Hash common.Hash
+	Keys []common.Hash
+	Vals [][]byte
+}
+
+// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
+func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, journal string, cache int, root common.Hash) (snapshot, error) {
+	// Retrieve the block number and hash of the snapshot, failing if no snapshot
+	// is present in the database (or crashed mid-update).
+	baseRoot := rawdb.ReadSnapshotRoot(diskdb)
+	if baseRoot == (common.Hash{}) {
+		return nil, errors.New("missing or corrupted snapshot")
+	}
+	base := &diskLayer{
+		diskdb: diskdb,
+		triedb: triedb,
+		cache:  fastcache.New(cache * 1024 * 1024),
+		root:   baseRoot,
+	}
+	// Open the journal, it must exist since even for 0 layer it stores whether
+	// we've already generated the snapshot or are in progress only
+	file, err := os.Open(journal)
+	if err != nil {
+		return nil, err
+	}
+	r := rlp.NewStream(file, 0)
+
+	// Read the snapshot generation progress for the disk layer
+	var generator journalGenerator
+	if err := r.Decode(&generator); err != nil {
+		return nil, fmt.Errorf("failed to load snapshot progress marker: %v", err)
+	}
+	// Load all the snapshot diffs from the journal
+	snapshot, err := loadDiffLayer(base, r)
+	if err != nil {
+		return nil, err
+	}
+	// Entire snapshot journal loaded, sanity check the head and return
+	// Journal doesn't exist, don't worry if it's not supposed to
+	if head := snapshot.Root(); head != root {
+		return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
+	}
+	// Everything loaded correctly, resume any suspended operations
+	if !generator.Done {
+		// If the generator was still wiping, restart one from scratch (fine for
+		// now as it's rare and the wiper deletes the stuff it touches anyway, so
+		// restarting won't incur a lot of extra database hops.
+		var wiper chan struct{}
+		if generator.Wiping {
+			log.Info("Resuming previous snapshot wipe")
+			wiper = wipeSnapshot(diskdb, false)
+		}
+		// Whether or not wiping was in progress, load any generator progress too
+		base.genMarker = generator.Marker
+		if base.genMarker == nil {
+			base.genMarker = []byte{}
+		}
+		base.genAbort = make(chan chan *generatorStats)
+
+		var origin uint64
+		if len(generator.Marker) >= 8 {
+			origin = binary.BigEndian.Uint64(generator.Marker)
+		}
+		go base.generate(&generatorStats{
+			wiping:   wiper,
+			origin:   origin,
+			start:    time.Now(),
+			accounts: generator.Accounts,
+			slots:    generator.Slots,
+			storage:  common.StorageSize(generator.Storage),
+		})
+	}
+	return snapshot, nil
+}
+
+// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
+// diff and verifying that it can be linked to the requested parent.
+func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
+	// Read the next diff journal entry
+	var root common.Hash
+	if err := r.Decode(&root); err != nil {
+		// The first read may fail with EOF, marking the end of the journal
+		if err == io.EOF {
+			return parent, nil
+		}
+		return nil, fmt.Errorf("load diff root: %v", err)
+	}
+	var accounts []journalAccount
+	if err := r.Decode(&accounts); err != nil {
+		return nil, fmt.Errorf("load diff accounts: %v", err)
+	}
+	accountData := make(map[common.Hash][]byte)
+	for _, entry := range accounts {
+		accountData[entry.Hash] = entry.Blob
+	}
+	var storage []journalStorage
+	if err := r.Decode(&storage); err != nil {
+		return nil, fmt.Errorf("load diff storage: %v", err)
+	}
+	storageData := make(map[common.Hash]map[common.Hash][]byte)
+	for _, entry := range storage {
+		slots := make(map[common.Hash][]byte)
+		for i, key := range entry.Keys {
+			slots[key] = entry.Vals[i]
+		}
+		storageData[entry.Hash] = slots
+	}
+	return loadDiffLayer(newDiffLayer(parent, root, accountData, storageData), r)
+}
+
+// Journal is the internal version of Journal that also returns the journal file
+// so subsequent layers know where to write to.
+func (dl *diskLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
+	// If the snapshot is currenty being generated, abort it
+	var stats *generatorStats
+	if dl.genAbort != nil {
+		abort := make(chan *generatorStats)
+		dl.genAbort <- abort
+
+		if stats = <-abort; stats != nil {
+			stats.Log("Journalling in-progress snapshot", dl.genMarker)
+		}
+	}
+	// Ensure the layer didn't get stale
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	if dl.stale {
+		return nil, common.Hash{}, ErrSnapshotStale
+	}
+	// We've reached the bottom, open the journal
+	file, err := os.Create(path)
+	if err != nil {
+		return nil, common.Hash{}, err
+	}
+	// Write out the generator marker
+	entry := journalGenerator{
+		Done:   dl.genMarker == nil,
+		Marker: dl.genMarker,
+	}
+	if stats != nil {
+		entry.Wiping = (stats.wiping != nil)
+		entry.Accounts = stats.accounts
+		entry.Slots = stats.slots
+		entry.Storage = uint64(stats.storage)
+	}
+	if err := rlp.Encode(file, entry); err != nil {
+		file.Close()
+		return nil, common.Hash{}, err
+	}
+	return file, dl.root, nil
+}
+
+// Journal is the internal version of Journal that also returns the journal file
+// so subsequent layers know where to write to.
+func (dl *diffLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
+	// Journal the parent first
+	writer, base, err := dl.parent.Journal(path)
+	if err != nil {
+		return nil, common.Hash{}, err
+	}
+	// Ensure the layer didn't get stale
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	if dl.stale {
+		writer.Close()
+		return nil, common.Hash{}, ErrSnapshotStale
+	}
+	// Everything below was journalled, persist this layer too
+	buf := bufio.NewWriter(writer)
+	if err := rlp.Encode(buf, dl.root); err != nil {
+		buf.Flush()
+		writer.Close()
+		return nil, common.Hash{}, err
+	}
+	accounts := make([]journalAccount, 0, len(dl.accountData))
+	for hash, blob := range dl.accountData {
+		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
+	}
+	if err := rlp.Encode(buf, accounts); err != nil {
+		buf.Flush()
+		writer.Close()
+		return nil, common.Hash{}, err
+	}
+	storage := make([]journalStorage, 0, len(dl.storageData))
+	for hash, slots := range dl.storageData {
+		keys := make([]common.Hash, 0, len(slots))
+		vals := make([][]byte, 0, len(slots))
+		for key, val := range slots {
+			keys = append(keys, key)
+			vals = append(vals, val)
+		}
+		storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
+	}
+	if err := rlp.Encode(buf, storage); err != nil {
+		buf.Flush()
+		writer.Close()
+		return nil, common.Hash{}, err
+	}
+	buf.Flush()
+	return writer, base, nil
+}
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index d35d69839..744d56c1b 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -18,31 +18,67 @@
 package snapshot
 
 import (
+	"bytes"
 	"errors"
 	"fmt"
-	"os"
+	"io"
 	"sync"
 
-	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/metrics"
-	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
 )
 
 var (
-	snapshotCleanHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/hit", nil)
-	snapshotCleanMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/miss", nil)
-	snapshotCleanReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/read", nil)
-	snapshotCleanWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/write", nil)
+	snapshotCleanAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
+	snapshotCleanAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
+	snapshotCleanAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
+	snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
+
+	snapshotCleanStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
+	snapshotCleanStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
+	snapshotCleanStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
+	snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
+
+	snapshotDirtyAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
+	snapshotDirtyAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
+	snapshotDirtyAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
+	snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
+
+	snapshotDirtyStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
+	snapshotDirtyStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
+	snapshotDirtyStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
+	snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
+
+	snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
+	snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
+	snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
+	snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
+
+	snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
+	snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
+
+	snapshotBloomAccountTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
+	snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
+	snapshotBloomAccountMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
+
+	snapshotBloomStorageTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
+	snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
+	snapshotBloomStorageMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
 
 	// ErrSnapshotStale is returned from data accessors if the underlying snapshot
 	// layer had been invalidated due to the chain progressing forward far enough
 	// to not maintain the layer's original state.
 	ErrSnapshotStale = errors.New("snapshot stale")
 
+	// ErrNotCoveredYet is returned from data accessors if the underlying snapshot
+	// is being generated currently and the requested data item is not yet in the
+	// range of accounts covered.
+	ErrNotCoveredYet = errors.New("not covered yet")
+
 	// errSnapshotCycle is returned if a snapshot is attempted to be inserted
 	// that forms a cycle in the snapshot tree.
 	errSnapshotCycle = errors.New("snapshot cycle")
@@ -79,7 +115,7 @@ type snapshot interface {
 	// Journal commits an entire diff hierarchy to disk into a single journal file.
 	// This is meant to be used during shutdown to persist the snapshot without
 	// flattening everything down (bad for reorgs).
-	Journal() error
+	Journal(path string) (io.WriteCloser, common.Hash, error)
 
 	// Stale return whether this layer has become stale (was flattened across) or
 	// if it's still live.
@@ -96,7 +132,10 @@ type snapshot interface {
 // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
 // cheap iteration of the account/storage tries for sync aid.
 type Tree struct {
-	layers map[common.Hash]snapshot // Collection of all known layers // TODO(karalabe): split Clique overlaps
+	diskdb ethdb.KeyValueStore      // Persistent database to store the snapshot
+	triedb *trie.Database           // In-memory cache to access the trie through
+	cache  int                      // Megabytes permitted to use for read caches
+	layers map[common.Hash]snapshot // Collection of all known layers
 	lock   sync.RWMutex
 }
 
@@ -105,20 +144,24 @@ type Tree struct {
 // of the snapshot matches the expected one.
 //
 // If the snapshot is missing or inconsistent, the entirety is deleted and will
-// be reconstructed from scratch based on the tries in the key-value store.
-func New(db ethdb.KeyValueStore, journal string, root common.Hash) (*Tree, error) {
-	// Attempt to load a previously persisted snapshot
-	head, err := loadSnapshot(db, journal, root)
-	if err != nil {
-		log.Warn("Failed to load snapshot, regenerating", "err", err)
-		if head, err = generateSnapshot(db, journal, root); err != nil {
-			return nil, err
-		}
-	}
-	// Existing snapshot loaded or one regenerated, seed all the layers
+// be reconstructed from scratch based on the tries in the key-value store, on a
+// background thread.
+func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, journal string, cache int, root common.Hash) *Tree {
+	// Create a new, empty snapshot tree
 	snap := &Tree{
+		diskdb: diskdb,
+		triedb: triedb,
+		cache:  cache,
 		layers: make(map[common.Hash]snapshot),
 	}
+	// Attempt to load a previously persisted snapshot and rebuild one if failed
+	head, err := loadSnapshot(diskdb, triedb, journal, cache, root)
+	if err != nil {
+		log.Warn("Failed to load snapshot, regenerating", "err", err)
+		snap.Rebuild(root)
+		return snap
+	}
+	// Existing snapshot loaded, seed all the layers
 	for head != nil {
 		snap.layers[head.Root()] = head
 
@@ -131,7 +174,7 @@ func New(db ethdb.KeyValueStore, journal string, root common.Hash) (*Tree, error
 			panic(fmt.Sprintf("unknown data layer: %T", self))
 		}
 	}
-	return snap, nil
+	return snap
 }
 
 // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
@@ -173,7 +216,7 @@ func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts ma
 // Cap traverses downwards the snapshot tree from a head block hash until the
 // number of allowed layers are crossed. All layers beyond the permitted number
 // are flattened downwards.
-func (t *Tree) Cap(root common.Hash, layers int, memory uint64) error {
+func (t *Tree) Cap(root common.Hash, layers int) error {
 	// Retrieve the head snapshot to cap from
 	snap := t.Snapshot(root)
 	if snap == nil {
@@ -190,6 +233,8 @@ func (t *Tree) Cap(root common.Hash, layers int, memory uint64) error {
 	// Flattening the bottom-most diff layer requires special casing since there's
 	// no child to rewire to the grandparent. In that case we can fake a temporary
 	// child for the capping and then remove it.
+	var persisted *diskLayer
+
 	switch layers {
 	case 0:
 		// If full commit was requested, flatten the diffs and merge onto disk
@@ -210,7 +255,7 @@ func (t *Tree) Cap(root common.Hash, layers int, memory uint64) error {
 		)
 		diff.lock.RLock()
 		bottom = diff.flatten().(*diffLayer)
-		if bottom.memory >= memory {
+		if bottom.memory >= aggregatorMemoryLimit {
 			base = diffToDisk(bottom)
 		}
 		diff.lock.RUnlock()
@@ -225,7 +270,7 @@ func (t *Tree) Cap(root common.Hash, layers int, memory uint64) error {
 
 	default:
 		// Many layers requested to be retained, cap normally
-		t.cap(diff, layers, memory)
+		persisted = t.cap(diff, layers)
 	}
 	// Remove any layer that is stale or links into a stale layer
 	children := make(map[common.Hash][]common.Hash)
@@ -248,13 +293,28 @@ func (t *Tree) Cap(root common.Hash, layers int, memory uint64) error {
 			remove(root)
 		}
 	}
+	// If the disk layer was modified, regenerate all the cummulative blooms
+	if persisted != nil {
+		var rebloom func(root common.Hash)
+		rebloom = func(root common.Hash) {
+			if diff, ok := t.layers[root].(*diffLayer); ok {
+				diff.rebloom(persisted)
+			}
+			for _, child := range children[root] {
+				rebloom(child)
+			}
+		}
+		rebloom(persisted.root)
+	}
 	return nil
 }
 
 // cap traverses downwards the diff tree until the number of allowed layers are
 // crossed. All diffs beyond the permitted number are flattened downwards. If the
 // layer limit is reached, memory cap is also enforced (but not before).
-func (t *Tree) cap(diff *diffLayer, layers int, memory uint64) {
+//
+// The method returns the new disk layer if diffs were persistend into it.
+func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
 	// Dive until we run out of layers or reach the persistent database
 	for ; layers > 2; layers-- {
 		// If we still have diff layers below, continue down
@@ -262,14 +322,14 @@ func (t *Tree) cap(diff *diffLayer, layers int, memory uint64) {
 			diff = parent
 		} else {
 			// Diff stack too shallow, return without modifications
-			return
+			return nil
 		}
 	}
 	// We're out of layers, flatten anything below, stopping if it's the disk or if
 	// the memory limit is not yet exceeded.
 	switch parent := diff.parent.(type) {
 	case *diskLayer:
-		return
+		return nil
 
 	case *diffLayer:
 		// Flatten the parent into the grandparent. The flattening internally obtains a
@@ -281,8 +341,14 @@ func (t *Tree) cap(diff *diffLayer, layers int, memory uint64) {
 		defer diff.lock.Unlock()
 
 		diff.parent = flattened
-		if flattened.memory < memory {
-			return
+		if flattened.memory < aggregatorMemoryLimit {
+			// Accumulator layer is smaller than the limit, so we can abort, unless
+			// there's a snapshot being generated currently. In that case, the trie
+			// will move fron underneath the generator so we **must** merge all the
+			// partial data down into the snapshot and restart the generation.
+			if flattened.parent.(*diskLayer).genAbort == nil {
+				return nil
+			}
 		}
 	default:
 		panic(fmt.Sprintf("unknown data layer: %T", parent))
@@ -296,6 +362,7 @@ func (t *Tree) cap(diff *diffLayer, layers int, memory uint64) {
 
 	t.layers[base.root] = base
 	diff.parent = base
+	return base
 }
 
 // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
@@ -303,8 +370,15 @@ func (t *Tree) cap(diff *diffLayer, layers int, memory uint64) {
 func diffToDisk(bottom *diffLayer) *diskLayer {
 	var (
 		base  = bottom.parent.(*diskLayer)
-		batch = base.db.NewBatch()
+		batch = base.diskdb.NewBatch()
+		stats *generatorStats
 	)
+	// If the disk layer is running a snapshot generator, abort it
+	if base.genAbort != nil {
+		abort := make(chan *generatorStats)
+		base.genAbort <- abort
+		stats = <-abort
+	}
 	// Start by temporarily deleting the current snapshot block marker. This
 	// ensures that in the case of a crash, the entire snapshot is invalidated.
 	rawdb.DeleteSnapshotRoot(batch)
@@ -319,6 +393,10 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 
 	// Push all the accounts into the database
 	for hash, data := range bottom.accountData {
+		// Skip any account not covered yet by the snapshot
+		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
+			continue
+		}
 		if len(data) > 0 {
 			// Account was updated, push to disk
 			rawdb.WriteAccountSnapshot(batch, hash, data)
@@ -335,19 +413,35 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 			rawdb.DeleteAccountSnapshot(batch, hash)
 			base.cache.Set(hash[:], nil)
 
-			it := rawdb.IterateStorageSnapshots(base.db, hash)
+			it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
 			for it.Next() {
 				if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
 					batch.Delete(key)
 					base.cache.Del(key[1:])
+
+					snapshotFlushStorageItemMeter.Mark(1)
+					snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
 				}
 			}
 			it.Release()
 		}
+		snapshotFlushAccountItemMeter.Mark(1)
+		snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
 	}
 	// Push all the storage slots into the database
 	for accountHash, storage := range bottom.storageData {
+		// Skip any account not covered yet by the snapshot
+		if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
+			continue
+		}
+		// Generation might be mid-account, track that case too
+		midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
+
 		for storageHash, data := range storage {
+			// Skip any slot not covered yet by the snapshot
+			if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
+				continue
+			}
 			if len(data) > 0 {
 				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
 				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
@@ -355,6 +449,8 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
 				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
 			}
+			snapshotFlushStorageItemMeter.Mark(1)
+			snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
 		}
 		if batch.ValueSize() > ethdb.IdealBatchSize {
 			if err := batch.Write(); err != nil {
@@ -368,65 +464,91 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 	if err := batch.Write(); err != nil {
 		log.Crit("Failed to write leftover snapshot", "err", err)
 	}
-	return &diskLayer{
-		root:    bottom.root,
-		cache:   base.cache,
-		db:      base.db,
-		journal: base.journal,
+	res := &diskLayer{
+		root:      bottom.root,
+		cache:     base.cache,
+		diskdb:    base.diskdb,
+		triedb:    base.triedb,
+		genMarker: base.genMarker,
 	}
+	// If snapshot generation hasn't finished yet, port over all the starts and
+	// continue where the previous round left off.
+	//
+	// Note, the `base.genAbort` comparison is not used normally, it's checked
+	// to allow the tests to play with the marker without triggering this path.
+	if base.genMarker != nil && base.genAbort != nil {
+		res.genMarker = base.genMarker
+		res.genAbort = make(chan chan *generatorStats)
+		go res.generate(stats)
+	}
+	return res
 }
 
 // Journal commits an entire diff hierarchy to disk into a single journal file.
 // This is meant to be used during shutdown to persist the snapshot without
 // flattening everything down (bad for reorgs).
-func (t *Tree) Journal(blockRoot common.Hash) error {
+//
+// The method returns the root hash of the base layer that needs to be persisted
+// to disk as a trie too to allow continuing any pending generation op.
+func (t *Tree) Journal(root common.Hash, path string) (common.Hash, error) {
 	// Retrieve the head snapshot to journal from var snap snapshot
-	snap := t.Snapshot(blockRoot)
+	snap := t.Snapshot(root)
 	if snap == nil {
-		return fmt.Errorf("snapshot [%#x] missing", blockRoot)
+		return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
 	}
 	// Run the journaling
 	t.lock.Lock()
 	defer t.lock.Unlock()
 
-	return snap.(snapshot).Journal()
+	writer, base, err := snap.(snapshot).Journal(path)
+	if err != nil {
+		return common.Hash{}, err
+	}
+	return base, writer.Close()
 }
 
-// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
-func loadSnapshot(db ethdb.KeyValueStore, journal string, root common.Hash) (snapshot, error) {
-	// Retrieve the block number and hash of the snapshot, failing if no snapshot
-	// is present in the database (or crashed mid-update).
-	baseRoot := rawdb.ReadSnapshotRoot(db)
-	if baseRoot == (common.Hash{}) {
-		return nil, errors.New("missing or corrupted snapshot")
-	}
-	base := &diskLayer{
-		journal: journal,
-		db:      db,
-		cache:   fastcache.New(512 * 1024 * 1024),
-		root:    baseRoot,
-	}
-	// Load all the snapshot diffs from the journal, failing if their chain is broken
-	// or does not lead from the disk snapshot to the specified head.
-	if _, err := os.Stat(journal); os.IsNotExist(err) {
-		// Journal doesn't exist, don't worry if it's not supposed to
-		if baseRoot != root {
-			return nil, fmt.Errorf("snapshot journal missing, head doesn't match snapshot: have %#x, want %#x", baseRoot, root)
+// Rebuild wipes all available snapshot data from the persistent database and
+// discard all caches and diff layers. Afterwards, it starts a new snapshot
+// generator with the given root hash.
+func (t *Tree) Rebuild(root common.Hash) {
+	t.lock.Lock()
+	defer t.lock.Unlock()
+
+	// Track whether there's a wipe currently running and keep it alive if so
+	var wiper chan struct{}
+
+	// Iterate over and mark all layers stale
+	for _, layer := range t.layers {
+		switch layer := layer.(type) {
+		case *diskLayer:
+			// If the base layer is generating, abort it and save
+			if layer.genAbort != nil {
+				abort := make(chan *generatorStats)
+				layer.genAbort <- abort
+
+				if stats := <-abort; stats != nil {
+					wiper = stats.wiping
+				}
+			}
+			// Layer should be inactive now, mark it as stale
+			layer.lock.Lock()
+			layer.stale = true
+			layer.lock.Unlock()
+
+		case *diffLayer:
+			// If the layer is a simple diff, simply mark as stale
+			layer.lock.Lock()
+			layer.stale = true
+			layer.lock.Unlock()
+
+		default:
+			panic(fmt.Sprintf("unknown layer type: %T", layer))
 		}
-		return base, nil
 	}
-	file, err := os.Open(journal)
-	if err != nil {
-		return nil, err
+	// Start generating a new snapshot from scratch on a backgroung thread. The
+	// generator will run a wiper first if there's not one running right now.
+	log.Info("Rebuilding state snapshot")
+	t.layers = map[common.Hash]snapshot{
+		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
 	}
-	snapshot, err := loadDiffLayer(base, rlp.NewStream(file, 0))
-	if err != nil {
-		return nil, err
-	}
-	// Entire snapshot journal loaded, sanity check the head and return
-	// Journal doesn't exist, don't worry if it's not supposed to
-	if head := snapshot.Root(); head != root {
-		return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
-	}
-	return snapshot, nil
 }
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index 9c872a895..44b8f3cef 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -31,9 +31,9 @@ import (
 func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
-		db:    rawdb.NewMemoryDatabase(),
-		root:  common.HexToHash("0x01"),
-		cache: fastcache.New(1024 * 500),
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -54,7 +54,7 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 2)
 	}
 	// Commit the diff layer onto the disk and ensure it's persisted
-	if err := snaps.Cap(common.HexToHash("0x02"), 0, 0); err != nil {
+	if err := snaps.Cap(common.HexToHash("0x02"), 0); err != nil {
 		t.Fatalf("failed to merge diff layer onto disk: %v", err)
 	}
 	// Since the base layer was modified, ensure that data retrievald on the external reference fail
@@ -76,9 +76,9 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
-		db:    rawdb.NewMemoryDatabase(),
-		root:  common.HexToHash("0x01"),
-		cache: fastcache.New(1024 * 500),
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -102,7 +102,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3)
 	}
 	// Commit the diff layer onto the disk and ensure it's persisted
-	if err := snaps.Cap(common.HexToHash("0x03"), 2, 0); err != nil {
+	defer func(memcap uint64) { aggregatorMemoryLimit = memcap }(aggregatorMemoryLimit)
+	aggregatorMemoryLimit = 0
+
+	if err := snaps.Cap(common.HexToHash("0x03"), 2); err != nil {
 		t.Fatalf("failed to merge diff layer onto disk: %v", err)
 	}
 	// Since the base layer was modified, ensure that data retrievald on the external reference fail
@@ -124,9 +127,9 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
-		db:    rawdb.NewMemoryDatabase(),
-		root:  common.HexToHash("0x01"),
-		cache: fastcache.New(1024 * 500),
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -150,7 +153,7 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 	ref := snaps.Snapshot(common.HexToHash("0x02"))
 
 	// Flatten the diff layer into the bottom accumulator
-	if err := snaps.Cap(common.HexToHash("0x03"), 1, 1024*1024); err != nil {
+	if err := snaps.Cap(common.HexToHash("0x03"), 1); err != nil {
 		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
 	}
 	// Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail
@@ -172,9 +175,9 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
-		db:    rawdb.NewMemoryDatabase(),
-		root:  common.HexToHash("0x01"),
-		cache: fastcache.New(1024 * 500),
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -202,14 +205,14 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 
 	// Doing a Cap operation with many allowed layers should be a no-op
 	exp := len(snaps.layers)
-	if err := snaps.Cap(common.HexToHash("0x04"), 2000, 1024*1024); err != nil {
+	if err := snaps.Cap(common.HexToHash("0x04"), 2000); err != nil {
 		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
 	}
 	if got := len(snaps.layers); got != exp {
 		t.Errorf("layers modified, got %d exp %d", got, exp)
 	}
 	// Flatten the diff layer into the bottom accumulator
-	if err := snaps.Cap(common.HexToHash("0x04"), 2, 1024*1024); err != nil {
+	if err := snaps.Cap(common.HexToHash("0x04"), 2); err != nil {
 		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
 	}
 	// Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail
@@ -236,9 +239,9 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 	}
 	// Create a starting base layer and a snapshot tree out of it
 	base := &diskLayer{
-		db:    rawdb.NewMemoryDatabase(),
-		root:  common.HexToHash("0x01"),
-		cache: fastcache.New(1024 * 500),
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
 	snaps := &Tree{
 		layers: map[common.Hash]snapshot{
@@ -280,11 +283,11 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 		t.Error(err)
 	}
 	// Cap to a bad root should fail
-	if err := snaps.Cap(common.HexToHash("0x1337"), 0, 1024); err == nil {
+	if err := snaps.Cap(common.HexToHash("0x1337"), 0); err == nil {
 		t.Errorf("expected error, got none")
 	}
 	// Now, merge the a-chain
-	snaps.Cap(common.HexToHash("0xa3"), 0, 1024)
+	snaps.Cap(common.HexToHash("0xa3"), 0)
 
 	// At this point, a2 got merged into a1. Thus, a1 is now modified, and as a1 is
 	// the parent of b2, b2 should no longer be able to iterate into parent.
@@ -308,7 +311,7 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 	}
 	// Now, merge it again, just for fun. It should now error, since a3
 	// is a disk layer
-	if err := snaps.Cap(common.HexToHash("0xa3"), 0, 1024); err == nil {
+	if err := snaps.Cap(common.HexToHash("0xa3"), 0); err == nil {
 		t.Error("expected error capping the disk layer, got none")
 	}
 }
diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go
new file mode 100644
index 000000000..052af6f1f
--- /dev/null
+++ b/core/state/snapshot/wipe.go
@@ -0,0 +1,130 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"time"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+)
+
+// wipeSnapshot starts a goroutine to iterate over the entire key-value database
+// and delete all the  data associated with the snapshot (accounts, storage,
+// metadata). After all is done, the snapshot range of the database is compacted
+// to free up unused data blocks.
+func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} {
+	// Wipe the snapshot root marker synchronously
+	if full {
+		rawdb.DeleteSnapshotRoot(db)
+	}
+	// Wipe everything else asynchronously
+	wiper := make(chan struct{}, 1)
+	go func() {
+		if err := wipeContent(db); err != nil {
+			log.Error("Failed to wipe state snapshot", "err", err) // Database close will trigger this
+			return
+		}
+		close(wiper)
+	}()
+	return wiper
+}
+
+// wipeContent iterates over the entire key-value database and deletes all the
+// data associated with the snapshot (accounts, storage), but not the root hash
+// as the wiper is meant to run on a background thread but the root needs to be
+// removed in sync to avoid data races. After all is done, the snapshot range of
+// the database is compacted to free up unused data blocks.
+func wipeContent(db ethdb.KeyValueStore) error {
+	if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil {
+		return err
+	}
+	if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil {
+		return err
+	}
+	// Compact the snapshot section of the database to get rid of unused space
+	start := time.Now()
+
+	log.Info("Compacting snapshot account area ")
+	end := common.CopyBytes(rawdb.SnapshotAccountPrefix)
+	end[len(end)-1]++
+
+	if err := db.Compact(rawdb.SnapshotAccountPrefix, end); err != nil {
+		return err
+	}
+	log.Info("Compacting snapshot storage area ")
+	end = common.CopyBytes(rawdb.SnapshotStoragePrefix)
+	end[len(end)-1]++
+
+	if err := db.Compact(rawdb.SnapshotStoragePrefix, end); err != nil {
+		return err
+	}
+	log.Info("Compacted snapshot area in database", "elapsed", common.PrettyDuration(time.Since(start)))
+
+	return nil
+}
+
+// wipeKeyRange deletes a range of keys from the database starting with prefix
+// and having a specific total key length.
+func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int) error {
+	// Batch deletions together to avoid holding an iterator for too long
+	var (
+		batch = db.NewBatch()
+		items int
+	)
+	// Iterate over the key-range and delete all of them
+	start, logged := time.Now(), time.Now()
+
+	it := db.NewIteratorWithStart(prefix)
+	for it.Next() {
+		// Skip any keys with the correct prefix but wrong lenth (trie nodes)
+		key := it.Key()
+		if !bytes.HasPrefix(key, prefix) {
+			break
+		}
+		if len(key) != keylen {
+			continue
+		}
+		// Delete the key and periodically recreate the batch and iterator
+		batch.Delete(key)
+		items++
+
+		if items%10000 == 0 {
+			// Batch too large (or iterator too long lived, flush and recreate)
+			it.Release()
+			if err := batch.Write(); err != nil {
+				return err
+			}
+			batch.Reset()
+			it = db.NewIteratorWithStart(key)
+
+			if time.Since(logged) > 8*time.Second {
+				log.Info("Deleting state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
+				logged = time.Now()
+			}
+		}
+	}
+	it.Release()
+	if err := batch.Write(); err != nil {
+		return err
+	}
+	log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
+	return nil
+}
diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/wipe_test.go
similarity index 77%
rename from core/state/snapshot/generate_test.go
rename to core/state/snapshot/wipe_test.go
index 180db920a..f12769a95 100644
--- a/core/state/snapshot/generate_test.go
+++ b/core/state/snapshot/wipe_test.go
@@ -59,17 +59,31 @@ func TestWipe(t *testing.T) {
 		// Randomize the suffix, dedup and inject it under the snapshot namespace
 		keysuffix := make([]byte, keysize)
 		rand.Read(keysuffix)
-		db.Put(append(rawdb.StateSnapshotPrefix, keysuffix...), randomHash().Bytes())
+
+		if rand.Int31n(2) == 0 {
+			db.Put(append(rawdb.SnapshotAccountPrefix, keysuffix...), randomHash().Bytes())
+		} else {
+			db.Put(append(rawdb.SnapshotStoragePrefix, keysuffix...), randomHash().Bytes())
+		}
 	}
 	// Sanity check that all the keys are present
 	var items int
 
-	it := db.NewIteratorWithPrefix(rawdb.StateSnapshotPrefix)
+	it := db.NewIteratorWithPrefix(rawdb.SnapshotAccountPrefix)
 	defer it.Release()
 
 	for it.Next() {
 		key := it.Key()
-		if len(key) == len(rawdb.StateSnapshotPrefix)+32 || len(key) == len(rawdb.StateSnapshotPrefix)+64 {
+		if len(key) == len(rawdb.SnapshotAccountPrefix)+common.HashLength {
+			items++
+		}
+	}
+	it = db.NewIteratorWithPrefix(rawdb.SnapshotStoragePrefix)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.SnapshotStoragePrefix)+2*common.HashLength {
 			items++
 		}
 	}
@@ -80,16 +94,24 @@ func TestWipe(t *testing.T) {
 		t.Errorf("snapshot block marker mismatch: have %#x, want <not-nil>", hash)
 	}
 	// Wipe all snapshot entries from the database
-	if err := wipeSnapshot(db); err != nil {
-		t.Fatalf("failed to wipe snapshot: %v", err)
-	}
+	<-wipeSnapshot(db, true)
+
 	// Iterate over the database end ensure no snapshot information remains
-	it = db.NewIteratorWithPrefix(rawdb.StateSnapshotPrefix)
+	it = db.NewIteratorWithPrefix(rawdb.SnapshotAccountPrefix)
 	defer it.Release()
 
 	for it.Next() {
 		key := it.Key()
-		if len(key) == len(rawdb.StateSnapshotPrefix)+32 || len(key) == len(rawdb.StateSnapshotPrefix)+64 {
+		if len(key) == len(rawdb.SnapshotAccountPrefix)+common.HashLength {
+			t.Errorf("snapshot entry remained after wipe: %x", key)
+		}
+	}
+	it = db.NewIteratorWithPrefix(rawdb.SnapshotStoragePrefix)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.SnapshotStoragePrefix)+2*common.HashLength {
 			t.Errorf("snapshot entry remained after wipe: %x", key)
 		}
 	}
diff --git a/core/state/statedb.go b/core/state/statedb.go
index f11bd2adb..1528b45aa 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -845,8 +845,8 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 			if err := s.snaps.Update(root, parent, s.snapAccounts, s.snapStorage); err != nil {
 				log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err)
 			}
-			if err := s.snaps.Cap(root, 16, 4*1024*1024); err != nil {
-				log.Warn("Failed to cap snapshot tree", "root", root, "layers", 16, "memory", 4*1024*1024, "err", err)
+			if err := s.snaps.Cap(root, 128); err != nil {
+				log.Warn("Failed to cap snapshot tree", "root", root, "layers", 128, "err", err)
 			}
 		}
 		s.snap, s.snapAccounts, s.snapStorage = nil, nil, nil
diff --git a/eth/backend.go b/eth/backend.go
index bda307d95..ed79340f5 100644
--- a/eth/backend.go
+++ b/eth/backend.go
@@ -127,7 +127,8 @@ func New(ctx *node.ServiceContext, config *Config) (*Ethereum, error) {
 		config.Miner.GasPrice = new(big.Int).Set(DefaultConfig.Miner.GasPrice)
 	}
 	if config.NoPruning && config.TrieDirtyCache > 0 {
-		config.TrieCleanCache += config.TrieDirtyCache
+		config.TrieCleanCache += config.TrieDirtyCache * 3 / 5
+		config.SnapshotCache += config.TrieDirtyCache * 3 / 5
 		config.TrieDirtyCache = 0
 	}
 	log.Info("Allocated trie memory caches", "clean", common.StorageSize(config.TrieCleanCache)*1024*1024, "dirty", common.StorageSize(config.TrieDirtyCache)*1024*1024)
@@ -184,6 +185,7 @@ func New(ctx *node.ServiceContext, config *Config) (*Ethereum, error) {
 			TrieDirtyLimit:      config.TrieDirtyCache,
 			TrieDirtyDisabled:   config.NoPruning,
 			TrieTimeLimit:       config.TrieTimeout,
+			SnapshotLimit:       config.SnapshotCache,
 		}
 	)
 	eth.blockchain, err = core.NewBlockChain(chainDb, cacheConfig, chainConfig, eth.engine, vmConfig, eth.shouldPreserve)
@@ -204,7 +206,7 @@ func New(ctx *node.ServiceContext, config *Config) (*Ethereum, error) {
 	eth.txPool = core.NewTxPool(config.TxPool, chainConfig, eth.blockchain)
 
 	// Permit the downloader to use the trie cache allowance during fast sync
-	cacheLimit := cacheConfig.TrieCleanLimit + cacheConfig.TrieDirtyLimit
+	cacheLimit := cacheConfig.TrieCleanLimit + cacheConfig.TrieDirtyLimit + cacheConfig.SnapshotLimit
 	checkpoint := config.Checkpoint
 	if checkpoint == nil {
 		checkpoint = params.TrustedCheckpoints[genesisHash]
diff --git a/eth/config.go b/eth/config.go
index 2eaf21fbc..160ce8aa5 100644
--- a/eth/config.go
+++ b/eth/config.go
@@ -50,6 +50,7 @@ var DefaultConfig = Config{
 	TrieCleanCache:     256,
 	TrieDirtyCache:     256,
 	TrieTimeout:        60 * time.Minute,
+	SnapshotCache:      256,
 	Miner: miner.Config{
 		GasFloor: 8000000,
 		GasCeil:  8000000,
@@ -125,6 +126,7 @@ type Config struct {
 	TrieCleanCache int
 	TrieDirtyCache int
 	TrieTimeout    time.Duration
+	SnapshotCache  int
 
 	// Mining options
 	Miner miner.Config
diff --git a/trie/iterator.go b/trie/iterator.go
index 88189c542..bb4025d8f 100644
--- a/trie/iterator.go
+++ b/trie/iterator.go
@@ -29,7 +29,6 @@ import (
 type Iterator struct {
 	nodeIt NodeIterator
 
-	Nodes int    // Number of nodes iterated over
 	Key   []byte // Current data key on which the iterator is positioned on
 	Value []byte // Current data value on which the iterator is positioned on
 	Err   error
@@ -47,7 +46,6 @@ func NewIterator(it NodeIterator) *Iterator {
 // Next moves the iterator forward one key-value entry.
 func (it *Iterator) Next() bool {
 	for it.nodeIt.Next(true) {
-		it.Nodes++
 		if it.nodeIt.Leaf() {
 			it.Key = it.nodeIt.LeafKey()
 			it.Value = it.nodeIt.LeafBlob()

From d5d7c0c24b824b0a166c606b4e71a92bd5e16e21 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Sun, 1 Dec 2019 20:49:00 +0100
Subject: [PATCH 08/28] core/state/snapshot: fix difflayer origin-initalization
 after flatten

---
 core/state/snapshot/difflayer.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 0743e4759..cf8c47c3e 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -396,6 +396,7 @@ func (dl *diffLayer) flatten() snapshot {
 	// Return the combo parent
 	return &diffLayer{
 		parent:      parent.parent,
+		origin:      parent.origin,
 		root:        dl.root,
 		storageList: parent.storageList,
 		storageData: parent.storageData,

From fd39f722a3ff506aba9a993bb10ef176f8d654d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Mon, 2 Dec 2019 13:27:20 +0200
Subject: [PATCH 09/28] core: journal the snapshot inside leveldb, not a flat
 file

---
 core/blockchain.go                    |  4 +-
 core/rawdb/accessors_snapshot.go      | 23 +++++++++
 core/rawdb/schema.go                  |  5 +-
 core/state/snapshot/difflayer_test.go |  5 +-
 core/state/snapshot/journal.go        | 70 +++++++++++----------------
 core/state/snapshot/snapshot.go       | 22 +++++----
 core/state/statedb.go                 |  4 +-
 7 files changed, 72 insertions(+), 61 deletions(-)

diff --git a/core/blockchain.go b/core/blockchain.go
index 3932baf55..f868f7301 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -302,7 +302,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 		}
 	}
 	// Load any existing snapshot, regenerating it if loading failed
-	bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), "snapshot.rlp", bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
+	bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
 
 	// Take ownership of this particular state
 	go bc.update()
@@ -854,7 +854,7 @@ func (bc *BlockChain) Stop() {
 	bc.wg.Wait()
 
 	// Ensure that the entirety of the state snapshot is journalled to disk.
-	snapBase, err := bc.snaps.Journal(bc.CurrentBlock().Root(), "snapshot.rlp")
+	snapBase, err := bc.snaps.Journal(bc.CurrentBlock().Root())
 	if err != nil {
 		log.Error("Failed to journal state snapshot", "err", err)
 	}
diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go
index 9388e857b..3a8d6c779 100644
--- a/core/rawdb/accessors_snapshot.go
+++ b/core/rawdb/accessors_snapshot.go
@@ -95,3 +95,26 @@ func DeleteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash com
 func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash) ethdb.Iterator {
 	return db.NewIteratorWithPrefix(storageSnapshotsKey(accountHash))
 }
+
+// ReadSnapshotJournal retrieves the serialized in-memory diff layers saved at
+// the last shutdown. The blob is expected to be max a few 10s of megabytes.
+func ReadSnapshotJournal(db ethdb.KeyValueReader) []byte {
+	data, _ := db.Get(snapshotJournalKey)
+	return data
+}
+
+// WriteSnapshotJournal stores the serialized in-memory diff layers to save at
+// shutdown. The blob is expected to be max a few 10s of megabytes.
+func WriteSnapshotJournal(db ethdb.KeyValueWriter, journal []byte) {
+	if err := db.Put(snapshotJournalKey, journal); err != nil {
+		log.Crit("Failed to store snapshot journal", "err", err)
+	}
+}
+
+// DeleteSnapshotJournal deletes the serialized in-memory diff layers saved at
+// the last shutdown
+func DeleteSnapshotJournal(db ethdb.KeyValueWriter) {
+	if err := db.Delete(snapshotJournalKey); err != nil {
+		log.Crit("Failed to remove snapshot journal", "err", err)
+	}
+}
diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go
index 1b8e53eb6..dc8faca32 100644
--- a/core/rawdb/schema.go
+++ b/core/rawdb/schema.go
@@ -41,9 +41,12 @@ var (
 	// fastTrieProgressKey tracks the number of trie entries imported during fast sync.
 	fastTrieProgressKey = []byte("TrieSync")
 
-	// snapshotRootKey tracks the number and hash of the last snapshot.
+	// snapshotRootKey tracks the hash of the last snapshot.
 	snapshotRootKey = []byte("SnapshotRoot")
 
+	// snapshotJournalKey tracks the in-memory diff layers across restarts.
+	snapshotJournalKey = []byte("SnapshotJournal")
+
 	// Data item prefixes (use single byte to avoid mixing data types, avoid `i`, used for indexes).
 	headerPrefix       = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header
 	headerTDSuffix     = []byte("t") // headerPrefix + num (uint64 big endian) + hash + headerTDSuffix -> td
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 9029bb04b..84220e359 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -20,8 +20,6 @@ import (
 	"bytes"
 	"math/big"
 	"math/rand"
-	"os"
-	"path"
 	"testing"
 
 	"github.com/VictoriaMetrics/fastcache"
@@ -343,7 +341,6 @@ func BenchmarkJournal(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		f, _, _ := layer.Journal(path.Join(os.TempDir(), "difflayer_journal.tmp"))
-		f.Close()
+		layer.Journal(new(bytes.Buffer))
 	}
 }
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
index 1c6c63a0b..1c36e0623 100644
--- a/core/state/snapshot/journal.go
+++ b/core/state/snapshot/journal.go
@@ -17,12 +17,11 @@
 package snapshot
 
 import (
-	"bufio"
+	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
-	"os"
 	"time"
 
 	"github.com/VictoriaMetrics/fastcache"
@@ -58,7 +57,7 @@ type journalStorage struct {
 }
 
 // loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
-func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, journal string, cache int, root common.Hash) (snapshot, error) {
+func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) (snapshot, error) {
 	// Retrieve the block number and hash of the snapshot, failing if no snapshot
 	// is present in the database (or crashed mid-update).
 	baseRoot := rawdb.ReadSnapshotRoot(diskdb)
@@ -71,13 +70,13 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, journal str
 		cache:  fastcache.New(cache * 1024 * 1024),
 		root:   baseRoot,
 	}
-	// Open the journal, it must exist since even for 0 layer it stores whether
+	// Retrieve the journal, it must exist since even for 0 layer it stores whether
 	// we've already generated the snapshot or are in progress only
-	file, err := os.Open(journal)
-	if err != nil {
-		return nil, err
+	journal := rawdb.ReadSnapshotJournal(diskdb)
+	if len(journal) == 0 {
+		return nil, errors.New("missing or corrupted snapshot journal")
 	}
-	r := rlp.NewStream(file, 0)
+	r := rlp.NewStream(bytes.NewReader(journal), 0)
 
 	// Read the snapshot generation progress for the disk layer
 	var generator journalGenerator
@@ -162,9 +161,9 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
 	return loadDiffLayer(newDiffLayer(parent, root, accountData, storageData), r)
 }
 
-// Journal is the internal version of Journal that also returns the journal file
-// so subsequent layers know where to write to.
-func (dl *diskLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
+// Journal writes the persistent layer generator stats into a buffer to be stored
+// in the database as the snapshot journal.
+func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
 	// If the snapshot is currenty being generated, abort it
 	var stats *generatorStats
 	if dl.genAbort != nil {
@@ -180,12 +179,7 @@ func (dl *diskLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
 	defer dl.lock.RUnlock()
 
 	if dl.stale {
-		return nil, common.Hash{}, ErrSnapshotStale
-	}
-	// We've reached the bottom, open the journal
-	file, err := os.Create(path)
-	if err != nil {
-		return nil, common.Hash{}, err
+		return common.Hash{}, ErrSnapshotStale
 	}
 	// Write out the generator marker
 	entry := journalGenerator{
@@ -198,44 +192,37 @@ func (dl *diskLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
 		entry.Slots = stats.slots
 		entry.Storage = uint64(stats.storage)
 	}
-	if err := rlp.Encode(file, entry); err != nil {
-		file.Close()
-		return nil, common.Hash{}, err
+	if err := rlp.Encode(buffer, entry); err != nil {
+		return common.Hash{}, err
 	}
-	return file, dl.root, nil
+	return dl.root, nil
 }
 
-// Journal is the internal version of Journal that also returns the journal file
-// so subsequent layers know where to write to.
-func (dl *diffLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
+// Journal writes the memory layer contents into a buffer to be stored in the
+// database as the snapshot journal.
+func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
 	// Journal the parent first
-	writer, base, err := dl.parent.Journal(path)
+	base, err := dl.parent.Journal(buffer)
 	if err != nil {
-		return nil, common.Hash{}, err
+		return common.Hash{}, err
 	}
 	// Ensure the layer didn't get stale
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
 	if dl.stale {
-		writer.Close()
-		return nil, common.Hash{}, ErrSnapshotStale
+		return common.Hash{}, ErrSnapshotStale
 	}
 	// Everything below was journalled, persist this layer too
-	buf := bufio.NewWriter(writer)
-	if err := rlp.Encode(buf, dl.root); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, common.Hash{}, err
+	if err := rlp.Encode(buffer, dl.root); err != nil {
+		return common.Hash{}, err
 	}
 	accounts := make([]journalAccount, 0, len(dl.accountData))
 	for hash, blob := range dl.accountData {
 		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
 	}
-	if err := rlp.Encode(buf, accounts); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, common.Hash{}, err
+	if err := rlp.Encode(buffer, accounts); err != nil {
+		return common.Hash{}, err
 	}
 	storage := make([]journalStorage, 0, len(dl.storageData))
 	for hash, slots := range dl.storageData {
@@ -247,11 +234,8 @@ func (dl *diffLayer) Journal(path string) (io.WriteCloser, common.Hash, error) {
 		}
 		storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
 	}
-	if err := rlp.Encode(buf, storage); err != nil {
-		buf.Flush()
-		writer.Close()
-		return nil, common.Hash{}, err
+	if err := rlp.Encode(buffer, storage); err != nil {
+		return common.Hash{}, err
 	}
-	buf.Flush()
-	return writer, base, nil
+	return base, nil
 }
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 744d56c1b..749f61078 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -21,7 +21,6 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
-	"io"
 	"sync"
 
 	"github.com/ethereum/go-ethereum/common"
@@ -112,10 +111,10 @@ type snapshot interface {
 	// copying everything.
 	Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
 
-	// Journal commits an entire diff hierarchy to disk into a single journal file.
+	// Journal commits an entire diff hierarchy to disk into a single journal entry.
 	// This is meant to be used during shutdown to persist the snapshot without
 	// flattening everything down (bad for reorgs).
-	Journal(path string) (io.WriteCloser, common.Hash, error)
+	Journal(buffer *bytes.Buffer) (common.Hash, error)
 
 	// Stale return whether this layer has become stale (was flattened across) or
 	// if it's still live.
@@ -146,7 +145,7 @@ type Tree struct {
 // If the snapshot is missing or inconsistent, the entirety is deleted and will
 // be reconstructed from scratch based on the tries in the key-value store, on a
 // background thread.
-func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, journal string, cache int, root common.Hash) *Tree {
+func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) *Tree {
 	// Create a new, empty snapshot tree
 	snap := &Tree{
 		diskdb: diskdb,
@@ -155,7 +154,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, journal string, cach
 		layers: make(map[common.Hash]snapshot),
 	}
 	// Attempt to load a previously persisted snapshot and rebuild one if failed
-	head, err := loadSnapshot(diskdb, triedb, journal, cache, root)
+	head, err := loadSnapshot(diskdb, triedb, cache, root)
 	if err != nil {
 		log.Warn("Failed to load snapshot, regenerating", "err", err)
 		snap.Rebuild(root)
@@ -401,6 +400,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 			// Account was updated, push to disk
 			rawdb.WriteAccountSnapshot(batch, hash, data)
 			base.cache.Set(hash[:], data)
+			snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
 
 			if batch.ValueSize() > ethdb.IdealBatchSize {
 				if err := batch.Write(); err != nil {
@@ -445,6 +445,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 			if len(data) > 0 {
 				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
 				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
+				snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
 			} else {
 				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
 				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
@@ -484,13 +485,13 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 	return res
 }
 
-// Journal commits an entire diff hierarchy to disk into a single journal file.
+// Journal commits an entire diff hierarchy to disk into a single journal entry.
 // This is meant to be used during shutdown to persist the snapshot without
 // flattening everything down (bad for reorgs).
 //
 // The method returns the root hash of the base layer that needs to be persisted
 // to disk as a trie too to allow continuing any pending generation op.
-func (t *Tree) Journal(root common.Hash, path string) (common.Hash, error) {
+func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
 	// Retrieve the head snapshot to journal from var snap snapshot
 	snap := t.Snapshot(root)
 	if snap == nil {
@@ -500,11 +501,14 @@ func (t *Tree) Journal(root common.Hash, path string) (common.Hash, error) {
 	t.lock.Lock()
 	defer t.lock.Unlock()
 
-	writer, base, err := snap.(snapshot).Journal(path)
+	journal := new(bytes.Buffer)
+	base, err := snap.(snapshot).Journal(journal)
 	if err != nil {
 		return common.Hash{}, err
 	}
-	return base, writer.Close()
+	// Store the journal into the database and return
+	rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
+	return base, nil
 }
 
 // Rebuild wipes all available snapshot data from the persistent database and
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 1528b45aa..b3ea95a46 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -845,8 +845,8 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 			if err := s.snaps.Update(root, parent, s.snapAccounts, s.snapStorage); err != nil {
 				log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err)
 			}
-			if err := s.snaps.Cap(root, 128); err != nil {
-				log.Warn("Failed to cap snapshot tree", "root", root, "layers", 128, "err", err)
+			if err := s.snaps.Cap(root, 127); err != nil { // Persistent layer is 128th, the last available trie
+				log.Warn("Failed to cap snapshot tree", "root", root, "layers", 127, "err", err)
 			}
 		}
 		s.snap, s.snapAccounts, s.snapStorage = nil, nil, nil

From 3ad4335accd08f2160aac489e4e16dceaae695be Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Mon, 2 Dec 2019 09:31:07 +0100
Subject: [PATCH 10/28] core/state/snapshot: node behavioural difference on
 bloom content

---
 core/state/snapshot/difflayer.go      | 19 ++++++++++++++++---
 core/state/snapshot/difflayer_test.go |  9 ++++++---
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index cf8c47c3e..634118a10 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -20,6 +20,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"math"
+	"math/rand"
 	"sort"
 	"sync"
 	"time"
@@ -63,8 +64,20 @@ var (
 	// bloom filter to keep its size to a minimum (given it's size and maximum
 	// entry count).
 	bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2))
+
+	// bloomHashesOffset is a runtime constant which determines which part of the
+	// the account/storage hash the hasher functions looks at, to determine the
+	// bloom key for an account/slot. This is randomized at init(), so that the
+	// global population of nodes do not all display the exact same behaviour with
+	// regards to bloom content
+	bloomHasherOffset = 0
 )
 
+func init() {
+	// Init bloomHasherOffset in the range [0:24] (requires 8 bytes)
+	bloomHasherOffset = rand.Intn(25)
+}
+
 // diffLayer represents a collection of modifications made to a state snapshot
 // after running a block on top. It contains one sorted list for the account trie
 // and one-one list for each storage tries.
@@ -100,7 +113,7 @@ func (h accountBloomHasher) Reset()                            { panic("not impl
 func (h accountBloomHasher) BlockSize() int                    { panic("not implemented") }
 func (h accountBloomHasher) Size() int                         { return 8 }
 func (h accountBloomHasher) Sum64() uint64 {
-	return binary.BigEndian.Uint64(h[:8])
+	return binary.BigEndian.Uint64(h[bloomHasherOffset : bloomHasherOffset+8])
 }
 
 // storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface
@@ -114,7 +127,8 @@ func (h storageBloomHasher) Reset()                            { panic("not impl
 func (h storageBloomHasher) BlockSize() int                    { panic("not implemented") }
 func (h storageBloomHasher) Size() int                         { return 8 }
 func (h storageBloomHasher) Sum64() uint64 {
-	return binary.BigEndian.Uint64(h[0][:8]) ^ binary.BigEndian.Uint64(h[1][:8])
+	return binary.BigEndian.Uint64(h[0][bloomHasherOffset:bloomHasherOffset+8]) ^
+		binary.BigEndian.Uint64(h[1][bloomHasherOffset:bloomHasherOffset+8])
 }
 
 // newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
@@ -205,7 +219,6 @@ func (dl *diffLayer) rebloom(origin *diskLayer) {
 	k := float64(dl.diffed.K())
 	n := float64(dl.diffed.N())
 	m := float64(dl.diffed.M())
-
 	snapshotBloomErrorGauge.Update(math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k))
 }
 
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 84220e359..7d7b21eb0 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -24,6 +24,7 @@ import (
 
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb/memorydb"
 	"github.com/ethereum/go-ethereum/rlp"
 )
@@ -216,7 +217,7 @@ func BenchmarkSearch(b *testing.B) {
 		layer = fill(layer)
 	}
 
-	key := common.Hash{}
+	key := crypto.Keccak256Hash([]byte{0x13, 0x38})
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		layer.AccountRLP(key)
@@ -229,10 +230,12 @@ func BenchmarkSearch(b *testing.B) {
 // BenchmarkSearchSlot-6   	  100000	     14554 ns/op
 // BenchmarkSearchSlot-6   	  100000	     22254 ns/op (when checking parent root using mutex)
 // BenchmarkSearchSlot-6   	  100000	     14551 ns/op (when checking parent number using atomic)
+// With bloom filter:
+// BenchmarkSearchSlot-6   	 3467835	       351 ns/op
 func BenchmarkSearchSlot(b *testing.B) {
 	// First, we set up 128 diff layers, with 1K items each
-	accountKey := common.Hash{}
-	storageKey := common.HexToHash("0x1337")
+	accountKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
+	storageKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
 	accountRLP := randomAccount()
 	fill := func(parent snapshot) *diffLayer {
 		accounts := make(map[common.Hash][]byte)

From 22c494d3996db9d7a09c8e5fcbfd15592b36f57a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 3 Dec 2019 10:00:26 +0200
Subject: [PATCH 11/28] core/state/snapshot: bloom, metrics and prefetcher
 fixes

---
 core/state/snapshot/difflayer.go | 36 ++++++++++++++++++++++----------
 core/state/snapshot/disklayer.go | 14 +++++++++----
 core/state/snapshot/snapshot.go  |  7 +++++++
 core/state_prefetcher.go         | 11 ++++++++--
 4 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 634118a10..05d55a6fa 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -43,9 +43,11 @@ var (
 
 	// aggregatorItemLimit is an approximate number of items that will end up
 	// in the agregator layer before it's flushed out to disk. A plain account
-	// weighs around 14B (+hash), a storage slot 32B (+hash), so 50 is a very
-	// rough average of what we might see.
-	aggregatorItemLimit = aggregatorMemoryLimit / 55
+	// weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot
+	// 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at
+	// 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a
+	// smaller number to be on the safe side.
+	aggregatorItemLimit = aggregatorMemoryLimit / 42
 
 	// bloomTargetError is the target false positive rate when the aggregator
 	// layer is at its fullest. The actual value will probably move around up
@@ -269,13 +271,13 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 		return dl.origin.AccountRLP(hash)
 	}
 	// The bloom filter hit, start poking in the internal maps
-	return dl.accountRLP(hash)
+	return dl.accountRLP(hash, 0)
 }
 
 // accountRLP is an internal version of AccountRLP that skips the bloom filter
 // checks and uses the internal maps to try and retrieve the data. It's meant
 // to be used if a higher layer's bloom filter hit already.
-func (dl *diffLayer) accountRLP(hash common.Hash) ([]byte, error) {
+func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
@@ -288,13 +290,18 @@ func (dl *diffLayer) accountRLP(hash common.Hash) ([]byte, error) {
 	// deleted, and is a different notion than an unknown account!
 	if data, ok := dl.accountData[hash]; ok {
 		snapshotDirtyAccountHitMeter.Mark(1)
-		snapshotDirtyAccountReadMeter.Mark(int64(len(data)))
+		snapshotDirtyAccountHitDepthHist.Update(int64(depth))
+		if n := len(data); n > 0 {
+			snapshotDirtyAccountReadMeter.Mark(int64(n))
+		} else {
+			snapshotDirtyAccountInexMeter.Mark(1)
+		}
 		snapshotBloomAccountTrueHitMeter.Mark(1)
 		return data, nil
 	}
 	// Account unknown to this diff, resolve from parent
 	if diff, ok := dl.parent.(*diffLayer); ok {
-		return diff.accountRLP(hash)
+		return diff.accountRLP(hash, depth+1)
 	}
 	// Failed to resolve through diff layers, mark a bloom error and use the disk
 	snapshotBloomAccountFalseHitMeter.Mark(1)
@@ -318,13 +325,13 @@ func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 		return dl.origin.Storage(accountHash, storageHash)
 	}
 	// The bloom filter hit, start poking in the internal maps
-	return dl.storage(accountHash, storageHash)
+	return dl.storage(accountHash, storageHash, 0)
 }
 
 // storage is an internal version of Storage that skips the bloom filter checks
 // and uses the internal maps to try and retrieve the data. It's meant  to be
 // used if a higher layer's bloom filter hit already.
-func (dl *diffLayer) storage(accountHash, storageHash common.Hash) ([]byte, error) {
+func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
@@ -338,19 +345,26 @@ func (dl *diffLayer) storage(accountHash, storageHash common.Hash) ([]byte, erro
 	if storage, ok := dl.storageData[accountHash]; ok {
 		if storage == nil {
 			snapshotDirtyStorageHitMeter.Mark(1)
+			snapshotDirtyStorageHitDepthHist.Update(int64(depth))
+			snapshotDirtyStorageInexMeter.Mark(1)
 			snapshotBloomStorageTrueHitMeter.Mark(1)
 			return nil, nil
 		}
 		if data, ok := storage[storageHash]; ok {
 			snapshotDirtyStorageHitMeter.Mark(1)
-			snapshotDirtyStorageReadMeter.Mark(int64(len(data)))
+			snapshotDirtyStorageHitDepthHist.Update(int64(depth))
+			if n := len(data); n > 0 {
+				snapshotDirtyStorageReadMeter.Mark(int64(n))
+			} else {
+				snapshotDirtyStorageInexMeter.Mark(1)
+			}
 			snapshotBloomStorageTrueHitMeter.Mark(1)
 			return data, nil
 		}
 	}
 	// Storage slot unknown to this diff, resolve from parent
 	if diff, ok := dl.parent.(*diffLayer); ok {
-		return diff.storage(accountHash, storageHash)
+		return diff.storage(accountHash, storageHash, depth+1)
 	}
 	// Failed to resolve through diff layers, mark a bloom error and use the disk
 	snapshotBloomStorageFalseHitMeter.Mark(1)
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index b1934d273..7c5b3e3e9 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -104,8 +104,11 @@ func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	dl.cache.Set(hash[:], blob)
 
 	snapshotCleanAccountMissMeter.Mark(1)
-	snapshotCleanAccountWriteMeter.Mark(int64(len(blob)))
-
+	if n := len(blob); n > 0 {
+		snapshotCleanAccountWriteMeter.Mark(int64(n))
+	} else {
+		snapshotCleanAccountInexMeter.Mark(1)
+	}
 	return blob, nil
 }
 
@@ -141,8 +144,11 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 	dl.cache.Set(key, blob)
 
 	snapshotCleanStorageMissMeter.Mark(1)
-	snapshotCleanStorageWriteMeter.Mark(int64(len(blob)))
-
+	if n := len(blob); n > 0 {
+		snapshotCleanStorageWriteMeter.Mark(int64(n))
+	} else {
+		snapshotCleanStorageInexMeter.Mark(1)
+	}
 	return blob, nil
 }
 
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 749f61078..7650cf2c1 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -34,24 +34,31 @@ import (
 var (
 	snapshotCleanAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
 	snapshotCleanAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
+	snapshotCleanAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
 	snapshotCleanAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
 	snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
 
 	snapshotCleanStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
 	snapshotCleanStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
+	snapshotCleanStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
 	snapshotCleanStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
 	snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
 
 	snapshotDirtyAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
 	snapshotDirtyAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
+	snapshotDirtyAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
 	snapshotDirtyAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
 	snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
 
 	snapshotDirtyStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
 	snapshotDirtyStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
+	snapshotDirtyStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
 	snapshotDirtyStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
 	snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
 
+	snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
+	snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
+
 	snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
 	snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
 	snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
diff --git a/core/state_prefetcher.go b/core/state_prefetcher.go
index bb5db4ced..2624f38db 100644
--- a/core/state_prefetcher.go
+++ b/core/state_prefetcher.go
@@ -54,6 +54,7 @@ func (p *statePrefetcher) Prefetch(block *types.Block, statedb *state.StateDB, c
 		gaspool = new(GasPool).AddGas(block.GasLimit())
 	)
 	// Iterate over and process the individual transactions
+	byzantium := p.config.IsByzantium(block.Number())
 	for i, tx := range block.Transactions() {
 		// If block precaching was interrupted, abort
 		if interrupt != nil && atomic.LoadUint32(interrupt) == 1 {
@@ -64,9 +65,15 @@ func (p *statePrefetcher) Prefetch(block *types.Block, statedb *state.StateDB, c
 		if err := precacheTransaction(p.config, p.bc, nil, gaspool, statedb, header, tx, cfg); err != nil {
 			return // Ugh, something went horribly wrong, bail out
 		}
+		// If we're pre-byzantium, pre-load trie nodes for the intermediate root
+		if !byzantium {
+			statedb.IntermediateRoot(true)
+		}
+	}
+	// If were post-byzantium, pre-load trie nodes for the final root hash
+	if byzantium {
+		statedb.IntermediateRoot(true)
 	}
-	// All transactions processed, finalize the block to force loading written-only trie paths
-	statedb.Finalise(true) // TODO(karalabe): should we run this on interrupt too?
 }
 
 // precacheTransaction attempts to apply a transaction to the given state database

From 7e389963014ebd175260b7128873a85e5c74bb7b Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Tue, 5 Nov 2019 19:06:37 +0100
Subject: [PATCH 12/28] core/state/snapshot: implement snapshot layer iteration

---
 core/state/snapshot/difflayer.go      | 289 ++++++++++++++++++++
 core/state/snapshot/difflayer_test.go | 363 ++++++++++++++++++++++++++
 core/state/snapshot/iteration.md      |  60 +++++
 3 files changed, 712 insertions(+)
 create mode 100644 core/state/snapshot/iteration.md

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 05d55a6fa..0d97fbdc8 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -18,6 +18,7 @@ package snapshot
 
 import (
 	"encoding/binary"
+	"bytes"
 	"fmt"
 	"math"
 	"math/rand"
@@ -475,3 +476,291 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
 	dl.storageList[accountHash] = accountStorageList
 	return accountStorageList
 }
+
+type Iterator interface {
+	// Next steps the iterator forward one element, and returns false if
+	// the iterator is exhausted
+	Next() bool
+	// Key returns the current key
+	Key() common.Hash
+	// Seek steps the iterator forward as many elements as needed, so that after
+	// calling Next(), the iterator will be at a key higher than the given hash
+	Seek(common.Hash)
+}
+
+func (dl *diffLayer) newIterator() Iterator {
+	dl.AccountList()
+	return &dlIterator{dl, -1}
+}
+
+type dlIterator struct {
+	layer *diffLayer
+	index int
+}
+
+func (it *dlIterator) Next() bool {
+	if it.index < len(it.layer.accountList) {
+		it.index++
+	}
+	return it.index < len(it.layer.accountList)
+}
+
+func (it *dlIterator) Key() common.Hash {
+	if it.index < len(it.layer.accountList) {
+		return it.layer.accountList[it.index]
+	}
+	return common.Hash{}
+}
+
+func (it *dlIterator) Seek(key common.Hash) {
+	// Search uses binary search to find and return the smallest index i
+	// in [0, n) at which f(i) is true
+	size := len(it.layer.accountList)
+	index := sort.Search(size,
+		func(i int) bool {
+			v := it.layer.accountList[i]
+			return bytes.Compare(key[:], v[:]) < 0
+		})
+	it.index = index - 1
+}
+
+type binaryIterator struct {
+	a     Iterator
+	b     Iterator
+	aDone bool
+	bDone bool
+	k     common.Hash
+}
+
+func (dl *diffLayer) newBinaryIterator() Iterator {
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		// parent is the disk layer
+		return dl.newIterator()
+	}
+	l := &binaryIterator{
+		a: dl.newIterator(),
+		b: parent.newBinaryIterator()}
+
+	l.aDone = !l.a.Next()
+	l.bDone = !l.b.Next()
+	return l
+}
+
+func (it *binaryIterator) Next() bool {
+
+	if it.aDone && it.bDone {
+		return false
+	}
+	nextB := it.b.Key()
+first:
+	nextA := it.a.Key()
+	if it.aDone {
+		it.bDone = !it.b.Next()
+		it.k = nextB
+		return true
+	}
+	if it.bDone {
+		it.aDone = !it.a.Next()
+		it.k = nextA
+		return true
+	}
+	if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
+		it.aDone = !it.a.Next()
+		it.k = nextA
+		return true
+	} else if diff == 0 {
+		// Now we need to advance one of them
+		it.aDone = !it.a.Next()
+		goto first
+	}
+	it.bDone = !it.b.Next()
+	it.k = nextB
+	return true
+}
+
+func (it *binaryIterator) Key() common.Hash {
+	return it.k
+}
+func (it *binaryIterator) Seek(key common.Hash) {
+	panic("todo: implement")
+}
+
+func (dl *diffLayer) iterators() []Iterator {
+	if parent, ok := dl.parent.(*diffLayer); ok {
+		iterators := parent.iterators()
+		return append(iterators, dl.newIterator())
+	}
+	return []Iterator{dl.newIterator()}
+}
+
+// fastIterator is a more optimized multi-layer iterator which maintains a
+// direct mapping of all iterators leading down to the bottom layer
+type fastIterator struct {
+	iterators []Iterator
+	initiated bool
+}
+
+// Len returns the number of active iterators
+func (fi *fastIterator) Len() int {
+	return len(fi.iterators)
+}
+
+// Less implements sort.Interface
+func (fi *fastIterator) Less(i, j int) bool {
+	a := fi.iterators[i].Key()
+	b := fi.iterators[j].Key()
+	return bytes.Compare(a[:], b[:]) < 0
+}
+
+// Swap implements sort.Interface
+func (fi *fastIterator) Swap(i, j int) {
+	fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
+}
+
+// Next implements the Iterator interface. It returns false if no more elemnts
+// can be retrieved (false == exhausted)
+func (fi *fastIterator) Next() bool {
+	if len(fi.iterators) == 0 {
+		return false
+	}
+	if !fi.initiated {
+		// Don't forward first time -- we had to 'Next' once in order to
+		// do the sorting already
+		fi.initiated = true
+		return true
+	}
+	return fi.innerNext(0)
+}
+
+// innerNext handles the next operation internally,
+// and should be invoked when we know that two elements in the list may have
+// the same value.
+// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
+// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
+// along the list and apply the same operation if needed
+func (fi *fastIterator) innerNext(pos int) bool {
+	if !fi.iterators[pos].Next() {
+		//Exhausted, remove this iterator
+		fi.remove(pos)
+		if len(fi.iterators) == 0 {
+			return false
+		}
+		return true
+	}
+	if pos == len(fi.iterators)-1 {
+		// Only one iterator left
+		return true
+	}
+	// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
+	val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
+	diff := bytes.Compare(val[:], neighbour[:])
+	if diff < 0 {
+		// It is still in correct place
+		return true
+	}
+	if diff == 0 {
+		// It has same value as the neighbour. So still in correct place, but
+		// we need to iterate on the neighbour
+		fi.innerNext(pos + 1)
+		return true
+	}
+	// At this point, the elem is in the wrong location, but the
+	// remaining list is sorted. Find out where to move the elem
+	iterationNeeded := false
+	index := sort.Search(len(fi.iterators), func(n int) bool {
+		if n <= pos {
+			// No need to search 'behind' us
+			return false
+		}
+		if n == len(fi.iterators)-1 {
+			// Can always place an elem last
+			return true
+		}
+		neighbour := fi.iterators[n+1].Key()
+		diff := bytes.Compare(val[:], neighbour[:])
+		if diff == 0 {
+			// The elem we're placing it next to has the same value,
+			// so it's going to need further iteration
+			iterationNeeded = true
+		}
+		return diff < 0
+	})
+	fi.move(pos, index)
+	if iterationNeeded {
+		fi.innerNext(index)
+	}
+	return true
+}
+
+// move moves an iterator to another position in the list
+func (fi *fastIterator) move(index, newpos int) {
+	if newpos > len(fi.iterators)-1 {
+		newpos = len(fi.iterators) - 1
+	}
+	var (
+		elem   = fi.iterators[index]
+		middle = fi.iterators[index+1 : newpos+1]
+		suffix []Iterator
+	)
+	if newpos < len(fi.iterators)-1 {
+		suffix = fi.iterators[newpos+1:]
+	}
+	fi.iterators = append(fi.iterators[:index], middle...)
+	fi.iterators = append(fi.iterators, elem)
+	fi.iterators = append(fi.iterators, suffix...)
+}
+
+// remove drops an iterator from the list
+func (fi *fastIterator) remove(index int) {
+	fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
+}
+
+// Key returns the current key
+func (fi *fastIterator) Key() common.Hash {
+	return fi.iterators[0].Key()
+}
+
+func (fi *fastIterator) Seek(key common.Hash) {
+	// We need to apply this across all iterators
+	var seen = make(map[common.Hash]struct{})
+
+	length := len(fi.iterators)
+	for i, it := range fi.iterators {
+		it.Seek(key)
+		for {
+			if !it.Next() {
+				// To be removed
+				// swap it to the last position for now
+				fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
+				length--
+				break
+			}
+			v := it.Key()
+			if _, exist := seen[v]; !exist {
+				seen[v] = struct{}{}
+				break
+			}
+		}
+	}
+	// Now remove those that were placed in the end
+	fi.iterators = fi.iterators[:length]
+	// The list is now totally unsorted, need to re-sort the entire list
+	sort.Sort(fi)
+	fi.initiated = false
+}
+
+// The fast iterator does not query parents as much.
+func (dl *diffLayer) newFastIterator() Iterator {
+	f := &fastIterator{dl.iterators(), false}
+	f.Seek(common.Hash{})
+	return f
+}
+
+// Debug is a convencience helper during testing
+func (fi *fastIterator) Debug() {
+	for _, it := range fi.iterators {
+		fmt.Printf(" %v ", it.Key()[31])
+	}
+	fmt.Println()
+}
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 7d7b21eb0..5f914f626 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -18,6 +18,7 @@ package snapshot
 
 import (
 	"bytes"
+	"encoding/binary"
 	"math/big"
 	"math/rand"
 	"testing"
@@ -347,3 +348,365 @@ func BenchmarkJournal(b *testing.B) {
 		layer.Journal(new(bytes.Buffer))
 	}
 }
+
+// TestIteratorBasics tests some simple single-layer iteration
+func TestIteratorBasics(t *testing.T) {
+	var (
+		accounts = make(map[common.Hash][]byte)
+		storage  = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	for i := 0; i < 100; i++ {
+		h := randomHash()
+		data := randomAccount()
+		accounts[h] = data
+		if rand.Intn(20) < 10 {
+			accStorage := make(map[common.Hash][]byte)
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[h] = accStorage
+		}
+	}
+	// Add some (identical) layers on top
+	parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
+	it := parent.newIterator()
+	verifyIterator(t, 100, it)
+}
+
+type testIterator struct {
+	values []byte
+}
+
+func newTestIterator(values ...byte) *testIterator {
+	return &testIterator{values}
+}
+func (ti *testIterator) Next() bool {
+	ti.values = ti.values[1:]
+	if len(ti.values) == 0 {
+		return false
+	}
+	return true
+}
+
+func (ti *testIterator) Key() common.Hash {
+	return common.BytesToHash([]byte{ti.values[0]})
+}
+
+func (ti *testIterator) Seek(common.Hash) {
+	panic("implement me")
+}
+
+func TestFastIteratorBasics(t *testing.T) {
+	type testCase struct {
+		lists   [][]byte
+		expKeys []byte
+	}
+	for i, tc := range []testCase{
+		{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
+			{7, 14, 15}, {9, 13, 15, 16}},
+			expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
+		{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
+			{9, 10}, {10, 13, 15, 16}},
+			expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
+	} {
+		var iterators []Iterator
+		for _, data := range tc.lists {
+			iterators = append(iterators, newTestIterator(data...))
+
+		}
+		fi := &fastIterator{
+			iterators: iterators,
+			initiated: false,
+		}
+		count := 0
+		for fi.Next() {
+			if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
+				t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
+			}
+			count++
+		}
+	}
+}
+
+func verifyIterator(t *testing.T, expCount int, it Iterator) {
+	var (
+		i    = 0
+		last = common.Hash{}
+	)
+	for it.Next() {
+		v := it.Key()
+		if bytes.Compare(last[:], v[:]) >= 0 {
+			t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
+		}
+		i++
+	}
+	if i != expCount {
+		t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
+	}
+}
+
+// TestIteratorTraversal tests some simple multi-layer iteration
+func TestIteratorTraversal(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+
+	mkAccounts := func(args ...string) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for _, h := range args {
+			accounts[common.HexToHash(h)] = randomAccount()
+		}
+		return accounts
+	}
+	// entries in multiple layers should only become output once
+	parent := newDiffLayer(emptyLayer{}, common.Hash{},
+		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+
+	child = child.Update(common.Hash{},
+		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+
+	// single layer iterator
+	verifyIterator(t, 3, child.newIterator())
+	// multi-layered binary iterator
+	verifyIterator(t, 7, child.newBinaryIterator())
+	// multi-layered fast iterator
+	verifyIterator(t, 7, child.newFastIterator())
+}
+
+func TestIteratorLargeTraversal(t *testing.T) {
+	// This testcase is a bit notorious -- all layers contain the exact
+	// same 200 accounts.
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	parent := newDiffLayer(emptyLayer{}, common.Hash{},
+		mkAccounts(200), storage)
+	child := parent.Update(common.Hash{},
+		mkAccounts(200), storage)
+	for i := 2; i < 100; i++ {
+		child = child.Update(common.Hash{},
+			mkAccounts(200), storage)
+	}
+	// single layer iterator
+	verifyIterator(t, 200, child.newIterator())
+	// multi-layered binary iterator
+	verifyIterator(t, 200, child.newBinaryIterator())
+	// multi-layered fast iterator
+	verifyIterator(t, 200, child.newFastIterator())
+}
+
+// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
+// same 200 accounts. That means that we need to process 2000 items, but only
+// spit out 200 values eventually.
+//
+//BenchmarkIteratorTraversal/binary_iterator-6         	    2008	    573290 ns/op	    9520 B/op	     199 allocs/op
+//BenchmarkIteratorTraversal/fast_iterator-6           	    1946	    575596 ns/op	   20146 B/op	     134 allocs/op
+func BenchmarkIteratorTraversal(b *testing.B) {
+
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	parent := newDiffLayer(emptyLayer{}, common.Hash{},
+		mkAccounts(200), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts(200), storage)
+
+	for i := 2; i < 100; i++ {
+		child = child.Update(common.Hash{},
+			mkAccounts(200), storage)
+
+	}
+	// We call this once before the benchmark, so the creation of
+	// sorted accountlists are not included in the results.
+	child.newBinaryIterator()
+	b.Run("binary iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newBinaryIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newFastIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+}
+
+// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
+// the baselayer is a lot larger than the upper layer.
+//
+// This is heavy on the binary iterator, which in most cases will have to
+// call recursively 100 times for the majority of the values
+//
+// BenchmarkIteratorLargeBaselayer/binary_iterator-6    	     585	   2067377 ns/op	    9520 B/op	     199 allocs/op
+// BenchmarkIteratorLargeBaselayer/fast_iterator-6      	   13198	     91043 ns/op	    8601 B/op	     118 allocs/op
+func BenchmarkIteratorLargeBaselayer(b *testing.B) {
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+
+	parent := newDiffLayer(emptyLayer{}, common.Hash{},
+		mkAccounts(2000), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts(20), storage)
+
+	for i := 2; i < 100; i++ {
+		child = child.Update(common.Hash{},
+			mkAccounts(20), storage)
+
+	}
+	// We call this once before the benchmark, so the creation of
+	// sorted accountlists are not included in the results.
+	child.newBinaryIterator()
+	b.Run("binary iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newBinaryIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newFastIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+}
+
+// TestIteratorFlatting tests what happens when we
+// - have a live iterator on child C (parent C1 -> C2 .. CN)
+// - flattens C2 all the way into CN
+// - continues iterating
+// Right now, this "works" simply because the keys do not change -- the
+// iterator is not aware that a layer has become stale. This naive
+// solution probably won't work in the long run, however
+func TestIteratorFlattning(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	mkAccounts := func(args ...string) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for _, h := range args {
+			accounts[common.HexToHash(h)] = randomAccount()
+		}
+		return accounts
+	}
+	// entries in multiple layers should only become output once
+	parent := newDiffLayer(emptyLayer{}, common.Hash{},
+		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+
+	child = child.Update(common.Hash{},
+		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+
+	it := child.newFastIterator()
+	child.parent.(*diffLayer).flatten()
+	// The parent should now be stale
+	verifyIterator(t, 7, it)
+}
+
+func TestIteratorSeek(t *testing.T) {
+	storage := make(map[common.Hash]map[common.Hash][]byte)
+	mkAccounts := func(args ...string) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for _, h := range args {
+			accounts[common.HexToHash(h)] = randomAccount()
+		}
+		return accounts
+	}
+	parent := newDiffLayer(emptyLayer{}, common.Hash{},
+		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+	it := parent.newIterator()
+	// expected: ee, f0, ff
+	it.Seek(common.HexToHash("0xdd"))
+	verifyIterator(t, 3, it)
+
+	it = parent.newIterator().(*dlIterator)
+	// expected: ee, f0, ff
+	it.Seek(common.HexToHash("0xaa"))
+	verifyIterator(t, 3, it)
+
+	it = parent.newIterator().(*dlIterator)
+	// expected: nothing
+	it.Seek(common.HexToHash("0xff"))
+	verifyIterator(t, 0, it)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+
+	child = child.Update(common.Hash{},
+		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+
+	it = child.newFastIterator()
+	// expected: cc, dd, ee, f0, ff
+	it.Seek(common.HexToHash("0xbb"))
+	verifyIterator(t, 5, it)
+
+	it = child.newFastIterator()
+	it.Seek(common.HexToHash("0xef"))
+	// exp: f0, ff
+	verifyIterator(t, 2, it)
+
+	it = child.newFastIterator()
+	it.Seek(common.HexToHash("0xf0"))
+	verifyIterator(t, 1, it)
+
+	it.Seek(common.HexToHash("0xff"))
+	verifyIterator(t, 0, it)
+
+}
diff --git a/core/state/snapshot/iteration.md b/core/state/snapshot/iteration.md
new file mode 100644
index 000000000..ca1962d42
--- /dev/null
+++ b/core/state/snapshot/iteration.md
@@ -0,0 +1,60 @@
+
+## How the fast iterator works
+
+Consider the following example, where we have `6` iterators, sorted from
+left to right in ascending order.
+
+Our 'primary' `A` iterator is on the left, containing the elements `[0,1,8]`
+```
+ A  B  C  D  E  F
+
+ 0  1  2  4  7  9
+ 1  2  9  -  14 13
+ 8  8  -     15 15
+ -  -        -  16
+                 -
+```
+When we call `Next` on the primary iterator, we get (ignoring the future keys)
+
+```
+A  B  C  D  E  F
+
+1  1  2  4  7  9
+```
+We detect that we now got an equality between our element and the next element.
+And we need to continue `Next`ing on the next element
+
+```
+1  2  2  4  7  9
+```
+And move on:
+```
+A  B  C  D  E  F
+
+1  2  9  4  7  9
+```
+Now we broke out of the equality, but we need to re-sort the element `C`
+
+```
+A  B  D  E  F  C
+
+1  2  4  7  9  9
+```
+
+And after shifting it rightwards, we check equality again, and find `C == F`, and thus
+call `Next` on `C`
+
+```
+A  B  D  E  F  C
+
+1  2  4  7  9  -
+```
+At this point, `C` was exhausted, and is removed
+
+```
+A  B  D  E  F
+
+1  2  4  7  9
+```
+And we're done with this step.
+

From e567675473606cb325c6f51c83b9c5cb0592c8d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Thu, 5 Dec 2019 15:37:25 +0200
Subject: [PATCH 13/28] core/state/snapshot: move iterator out into its own
 files

---
 core/state/snapshot/difflayer.go       | 289 ------------------
 core/state/snapshot/difflayer_test.go  | 363 -----------------------
 core/state/snapshot/iterator.go        | 116 ++++++++
 core/state/snapshot/iterator_binary.go | 115 +++++++
 core/state/snapshot/iterator_fast.go   | 211 +++++++++++++
 core/state/snapshot/iterator_test.go   | 396 +++++++++++++++++++++++++
 6 files changed, 838 insertions(+), 652 deletions(-)
 create mode 100644 core/state/snapshot/iterator.go
 create mode 100644 core/state/snapshot/iterator_binary.go
 create mode 100644 core/state/snapshot/iterator_fast.go
 create mode 100644 core/state/snapshot/iterator_test.go

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 0d97fbdc8..05d55a6fa 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -18,7 +18,6 @@ package snapshot
 
 import (
 	"encoding/binary"
-	"bytes"
 	"fmt"
 	"math"
 	"math/rand"
@@ -476,291 +475,3 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
 	dl.storageList[accountHash] = accountStorageList
 	return accountStorageList
 }
-
-type Iterator interface {
-	// Next steps the iterator forward one element, and returns false if
-	// the iterator is exhausted
-	Next() bool
-	// Key returns the current key
-	Key() common.Hash
-	// Seek steps the iterator forward as many elements as needed, so that after
-	// calling Next(), the iterator will be at a key higher than the given hash
-	Seek(common.Hash)
-}
-
-func (dl *diffLayer) newIterator() Iterator {
-	dl.AccountList()
-	return &dlIterator{dl, -1}
-}
-
-type dlIterator struct {
-	layer *diffLayer
-	index int
-}
-
-func (it *dlIterator) Next() bool {
-	if it.index < len(it.layer.accountList) {
-		it.index++
-	}
-	return it.index < len(it.layer.accountList)
-}
-
-func (it *dlIterator) Key() common.Hash {
-	if it.index < len(it.layer.accountList) {
-		return it.layer.accountList[it.index]
-	}
-	return common.Hash{}
-}
-
-func (it *dlIterator) Seek(key common.Hash) {
-	// Search uses binary search to find and return the smallest index i
-	// in [0, n) at which f(i) is true
-	size := len(it.layer.accountList)
-	index := sort.Search(size,
-		func(i int) bool {
-			v := it.layer.accountList[i]
-			return bytes.Compare(key[:], v[:]) < 0
-		})
-	it.index = index - 1
-}
-
-type binaryIterator struct {
-	a     Iterator
-	b     Iterator
-	aDone bool
-	bDone bool
-	k     common.Hash
-}
-
-func (dl *diffLayer) newBinaryIterator() Iterator {
-	parent, ok := dl.parent.(*diffLayer)
-	if !ok {
-		// parent is the disk layer
-		return dl.newIterator()
-	}
-	l := &binaryIterator{
-		a: dl.newIterator(),
-		b: parent.newBinaryIterator()}
-
-	l.aDone = !l.a.Next()
-	l.bDone = !l.b.Next()
-	return l
-}
-
-func (it *binaryIterator) Next() bool {
-
-	if it.aDone && it.bDone {
-		return false
-	}
-	nextB := it.b.Key()
-first:
-	nextA := it.a.Key()
-	if it.aDone {
-		it.bDone = !it.b.Next()
-		it.k = nextB
-		return true
-	}
-	if it.bDone {
-		it.aDone = !it.a.Next()
-		it.k = nextA
-		return true
-	}
-	if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
-		it.aDone = !it.a.Next()
-		it.k = nextA
-		return true
-	} else if diff == 0 {
-		// Now we need to advance one of them
-		it.aDone = !it.a.Next()
-		goto first
-	}
-	it.bDone = !it.b.Next()
-	it.k = nextB
-	return true
-}
-
-func (it *binaryIterator) Key() common.Hash {
-	return it.k
-}
-func (it *binaryIterator) Seek(key common.Hash) {
-	panic("todo: implement")
-}
-
-func (dl *diffLayer) iterators() []Iterator {
-	if parent, ok := dl.parent.(*diffLayer); ok {
-		iterators := parent.iterators()
-		return append(iterators, dl.newIterator())
-	}
-	return []Iterator{dl.newIterator()}
-}
-
-// fastIterator is a more optimized multi-layer iterator which maintains a
-// direct mapping of all iterators leading down to the bottom layer
-type fastIterator struct {
-	iterators []Iterator
-	initiated bool
-}
-
-// Len returns the number of active iterators
-func (fi *fastIterator) Len() int {
-	return len(fi.iterators)
-}
-
-// Less implements sort.Interface
-func (fi *fastIterator) Less(i, j int) bool {
-	a := fi.iterators[i].Key()
-	b := fi.iterators[j].Key()
-	return bytes.Compare(a[:], b[:]) < 0
-}
-
-// Swap implements sort.Interface
-func (fi *fastIterator) Swap(i, j int) {
-	fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
-}
-
-// Next implements the Iterator interface. It returns false if no more elemnts
-// can be retrieved (false == exhausted)
-func (fi *fastIterator) Next() bool {
-	if len(fi.iterators) == 0 {
-		return false
-	}
-	if !fi.initiated {
-		// Don't forward first time -- we had to 'Next' once in order to
-		// do the sorting already
-		fi.initiated = true
-		return true
-	}
-	return fi.innerNext(0)
-}
-
-// innerNext handles the next operation internally,
-// and should be invoked when we know that two elements in the list may have
-// the same value.
-// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
-// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
-// along the list and apply the same operation if needed
-func (fi *fastIterator) innerNext(pos int) bool {
-	if !fi.iterators[pos].Next() {
-		//Exhausted, remove this iterator
-		fi.remove(pos)
-		if len(fi.iterators) == 0 {
-			return false
-		}
-		return true
-	}
-	if pos == len(fi.iterators)-1 {
-		// Only one iterator left
-		return true
-	}
-	// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
-	val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
-	diff := bytes.Compare(val[:], neighbour[:])
-	if diff < 0 {
-		// It is still in correct place
-		return true
-	}
-	if diff == 0 {
-		// It has same value as the neighbour. So still in correct place, but
-		// we need to iterate on the neighbour
-		fi.innerNext(pos + 1)
-		return true
-	}
-	// At this point, the elem is in the wrong location, but the
-	// remaining list is sorted. Find out where to move the elem
-	iterationNeeded := false
-	index := sort.Search(len(fi.iterators), func(n int) bool {
-		if n <= pos {
-			// No need to search 'behind' us
-			return false
-		}
-		if n == len(fi.iterators)-1 {
-			// Can always place an elem last
-			return true
-		}
-		neighbour := fi.iterators[n+1].Key()
-		diff := bytes.Compare(val[:], neighbour[:])
-		if diff == 0 {
-			// The elem we're placing it next to has the same value,
-			// so it's going to need further iteration
-			iterationNeeded = true
-		}
-		return diff < 0
-	})
-	fi.move(pos, index)
-	if iterationNeeded {
-		fi.innerNext(index)
-	}
-	return true
-}
-
-// move moves an iterator to another position in the list
-func (fi *fastIterator) move(index, newpos int) {
-	if newpos > len(fi.iterators)-1 {
-		newpos = len(fi.iterators) - 1
-	}
-	var (
-		elem   = fi.iterators[index]
-		middle = fi.iterators[index+1 : newpos+1]
-		suffix []Iterator
-	)
-	if newpos < len(fi.iterators)-1 {
-		suffix = fi.iterators[newpos+1:]
-	}
-	fi.iterators = append(fi.iterators[:index], middle...)
-	fi.iterators = append(fi.iterators, elem)
-	fi.iterators = append(fi.iterators, suffix...)
-}
-
-// remove drops an iterator from the list
-func (fi *fastIterator) remove(index int) {
-	fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
-}
-
-// Key returns the current key
-func (fi *fastIterator) Key() common.Hash {
-	return fi.iterators[0].Key()
-}
-
-func (fi *fastIterator) Seek(key common.Hash) {
-	// We need to apply this across all iterators
-	var seen = make(map[common.Hash]struct{})
-
-	length := len(fi.iterators)
-	for i, it := range fi.iterators {
-		it.Seek(key)
-		for {
-			if !it.Next() {
-				// To be removed
-				// swap it to the last position for now
-				fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
-				length--
-				break
-			}
-			v := it.Key()
-			if _, exist := seen[v]; !exist {
-				seen[v] = struct{}{}
-				break
-			}
-		}
-	}
-	// Now remove those that were placed in the end
-	fi.iterators = fi.iterators[:length]
-	// The list is now totally unsorted, need to re-sort the entire list
-	sort.Sort(fi)
-	fi.initiated = false
-}
-
-// The fast iterator does not query parents as much.
-func (dl *diffLayer) newFastIterator() Iterator {
-	f := &fastIterator{dl.iterators(), false}
-	f.Seek(common.Hash{})
-	return f
-}
-
-// Debug is a convencience helper during testing
-func (fi *fastIterator) Debug() {
-	for _, it := range fi.iterators {
-		fmt.Printf(" %v ", it.Key()[31])
-	}
-	fmt.Println()
-}
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 5f914f626..7d7b21eb0 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -18,7 +18,6 @@ package snapshot
 
 import (
 	"bytes"
-	"encoding/binary"
 	"math/big"
 	"math/rand"
 	"testing"
@@ -348,365 +347,3 @@ func BenchmarkJournal(b *testing.B) {
 		layer.Journal(new(bytes.Buffer))
 	}
 }
-
-// TestIteratorBasics tests some simple single-layer iteration
-func TestIteratorBasics(t *testing.T) {
-	var (
-		accounts = make(map[common.Hash][]byte)
-		storage  = make(map[common.Hash]map[common.Hash][]byte)
-	)
-	// Fill up a parent
-	for i := 0; i < 100; i++ {
-		h := randomHash()
-		data := randomAccount()
-		accounts[h] = data
-		if rand.Intn(20) < 10 {
-			accStorage := make(map[common.Hash][]byte)
-			value := make([]byte, 32)
-			rand.Read(value)
-			accStorage[randomHash()] = value
-			storage[h] = accStorage
-		}
-	}
-	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage)
-	it := parent.newIterator()
-	verifyIterator(t, 100, it)
-}
-
-type testIterator struct {
-	values []byte
-}
-
-func newTestIterator(values ...byte) *testIterator {
-	return &testIterator{values}
-}
-func (ti *testIterator) Next() bool {
-	ti.values = ti.values[1:]
-	if len(ti.values) == 0 {
-		return false
-	}
-	return true
-}
-
-func (ti *testIterator) Key() common.Hash {
-	return common.BytesToHash([]byte{ti.values[0]})
-}
-
-func (ti *testIterator) Seek(common.Hash) {
-	panic("implement me")
-}
-
-func TestFastIteratorBasics(t *testing.T) {
-	type testCase struct {
-		lists   [][]byte
-		expKeys []byte
-	}
-	for i, tc := range []testCase{
-		{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
-			{7, 14, 15}, {9, 13, 15, 16}},
-			expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
-		{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
-			{9, 10}, {10, 13, 15, 16}},
-			expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
-	} {
-		var iterators []Iterator
-		for _, data := range tc.lists {
-			iterators = append(iterators, newTestIterator(data...))
-
-		}
-		fi := &fastIterator{
-			iterators: iterators,
-			initiated: false,
-		}
-		count := 0
-		for fi.Next() {
-			if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
-				t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
-			}
-			count++
-		}
-	}
-}
-
-func verifyIterator(t *testing.T, expCount int, it Iterator) {
-	var (
-		i    = 0
-		last = common.Hash{}
-	)
-	for it.Next() {
-		v := it.Key()
-		if bytes.Compare(last[:], v[:]) >= 0 {
-			t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
-		}
-		i++
-	}
-	if i != expCount {
-		t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
-	}
-}
-
-// TestIteratorTraversal tests some simple multi-layer iteration
-func TestIteratorTraversal(t *testing.T) {
-	var (
-		storage = make(map[common.Hash]map[common.Hash][]byte)
-	)
-
-	mkAccounts := func(args ...string) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for _, h := range args {
-			accounts[common.HexToHash(h)] = randomAccount()
-		}
-		return accounts
-	}
-	// entries in multiple layers should only become output once
-	parent := newDiffLayer(emptyLayer{}, common.Hash{},
-		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
-
-	child = child.Update(common.Hash{},
-		mkAccounts("0xcc", "0xf0", "0xff"), storage)
-
-	// single layer iterator
-	verifyIterator(t, 3, child.newIterator())
-	// multi-layered binary iterator
-	verifyIterator(t, 7, child.newBinaryIterator())
-	// multi-layered fast iterator
-	verifyIterator(t, 7, child.newFastIterator())
-}
-
-func TestIteratorLargeTraversal(t *testing.T) {
-	// This testcase is a bit notorious -- all layers contain the exact
-	// same 200 accounts.
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-	mkAccounts := func(num int) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for i := 0; i < num; i++ {
-			h := common.Hash{}
-			binary.BigEndian.PutUint64(h[:], uint64(i+1))
-			accounts[h] = randomAccount()
-		}
-		return accounts
-	}
-	parent := newDiffLayer(emptyLayer{}, common.Hash{},
-		mkAccounts(200), storage)
-	child := parent.Update(common.Hash{},
-		mkAccounts(200), storage)
-	for i := 2; i < 100; i++ {
-		child = child.Update(common.Hash{},
-			mkAccounts(200), storage)
-	}
-	// single layer iterator
-	verifyIterator(t, 200, child.newIterator())
-	// multi-layered binary iterator
-	verifyIterator(t, 200, child.newBinaryIterator())
-	// multi-layered fast iterator
-	verifyIterator(t, 200, child.newFastIterator())
-}
-
-// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
-// same 200 accounts. That means that we need to process 2000 items, but only
-// spit out 200 values eventually.
-//
-//BenchmarkIteratorTraversal/binary_iterator-6         	    2008	    573290 ns/op	    9520 B/op	     199 allocs/op
-//BenchmarkIteratorTraversal/fast_iterator-6           	    1946	    575596 ns/op	   20146 B/op	     134 allocs/op
-func BenchmarkIteratorTraversal(b *testing.B) {
-
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-
-	mkAccounts := func(num int) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for i := 0; i < num; i++ {
-			h := common.Hash{}
-			binary.BigEndian.PutUint64(h[:], uint64(i+1))
-			accounts[h] = randomAccount()
-		}
-		return accounts
-	}
-	parent := newDiffLayer(emptyLayer{}, common.Hash{},
-		mkAccounts(200), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts(200), storage)
-
-	for i := 2; i < 100; i++ {
-		child = child.Update(common.Hash{},
-			mkAccounts(200), storage)
-
-	}
-	// We call this once before the benchmark, so the creation of
-	// sorted accountlists are not included in the results.
-	child.newBinaryIterator()
-	b.Run("binary iterator", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			got := 0
-			it := child.newBinaryIterator()
-			for it.Next() {
-				got++
-			}
-			if exp := 200; got != exp {
-				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
-			}
-		}
-	})
-	b.Run("fast iterator", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			got := 0
-			it := child.newFastIterator()
-			for it.Next() {
-				got++
-			}
-			if exp := 200; got != exp {
-				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
-			}
-		}
-	})
-}
-
-// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
-// the baselayer is a lot larger than the upper layer.
-//
-// This is heavy on the binary iterator, which in most cases will have to
-// call recursively 100 times for the majority of the values
-//
-// BenchmarkIteratorLargeBaselayer/binary_iterator-6    	     585	   2067377 ns/op	    9520 B/op	     199 allocs/op
-// BenchmarkIteratorLargeBaselayer/fast_iterator-6      	   13198	     91043 ns/op	    8601 B/op	     118 allocs/op
-func BenchmarkIteratorLargeBaselayer(b *testing.B) {
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-
-	mkAccounts := func(num int) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for i := 0; i < num; i++ {
-			h := common.Hash{}
-			binary.BigEndian.PutUint64(h[:], uint64(i+1))
-			accounts[h] = randomAccount()
-		}
-		return accounts
-	}
-
-	parent := newDiffLayer(emptyLayer{}, common.Hash{},
-		mkAccounts(2000), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts(20), storage)
-
-	for i := 2; i < 100; i++ {
-		child = child.Update(common.Hash{},
-			mkAccounts(20), storage)
-
-	}
-	// We call this once before the benchmark, so the creation of
-	// sorted accountlists are not included in the results.
-	child.newBinaryIterator()
-	b.Run("binary iterator", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			got := 0
-			it := child.newBinaryIterator()
-			for it.Next() {
-				got++
-			}
-			if exp := 2000; got != exp {
-				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
-			}
-		}
-	})
-	b.Run("fast iterator", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			got := 0
-			it := child.newFastIterator()
-			for it.Next() {
-				got++
-			}
-			if exp := 2000; got != exp {
-				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
-			}
-		}
-	})
-}
-
-// TestIteratorFlatting tests what happens when we
-// - have a live iterator on child C (parent C1 -> C2 .. CN)
-// - flattens C2 all the way into CN
-// - continues iterating
-// Right now, this "works" simply because the keys do not change -- the
-// iterator is not aware that a layer has become stale. This naive
-// solution probably won't work in the long run, however
-func TestIteratorFlattning(t *testing.T) {
-	var (
-		storage = make(map[common.Hash]map[common.Hash][]byte)
-	)
-	mkAccounts := func(args ...string) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for _, h := range args {
-			accounts[common.HexToHash(h)] = randomAccount()
-		}
-		return accounts
-	}
-	// entries in multiple layers should only become output once
-	parent := newDiffLayer(emptyLayer{}, common.Hash{},
-		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
-
-	child = child.Update(common.Hash{},
-		mkAccounts("0xcc", "0xf0", "0xff"), storage)
-
-	it := child.newFastIterator()
-	child.parent.(*diffLayer).flatten()
-	// The parent should now be stale
-	verifyIterator(t, 7, it)
-}
-
-func TestIteratorSeek(t *testing.T) {
-	storage := make(map[common.Hash]map[common.Hash][]byte)
-	mkAccounts := func(args ...string) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for _, h := range args {
-			accounts[common.HexToHash(h)] = randomAccount()
-		}
-		return accounts
-	}
-	parent := newDiffLayer(emptyLayer{}, common.Hash{},
-		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
-	it := parent.newIterator()
-	// expected: ee, f0, ff
-	it.Seek(common.HexToHash("0xdd"))
-	verifyIterator(t, 3, it)
-
-	it = parent.newIterator().(*dlIterator)
-	// expected: ee, f0, ff
-	it.Seek(common.HexToHash("0xaa"))
-	verifyIterator(t, 3, it)
-
-	it = parent.newIterator().(*dlIterator)
-	// expected: nothing
-	it.Seek(common.HexToHash("0xff"))
-	verifyIterator(t, 0, it)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
-
-	child = child.Update(common.Hash{},
-		mkAccounts("0xcc", "0xf0", "0xff"), storage)
-
-	it = child.newFastIterator()
-	// expected: cc, dd, ee, f0, ff
-	it.Seek(common.HexToHash("0xbb"))
-	verifyIterator(t, 5, it)
-
-	it = child.newFastIterator()
-	it.Seek(common.HexToHash("0xef"))
-	// exp: f0, ff
-	verifyIterator(t, 2, it)
-
-	it = child.newFastIterator()
-	it.Seek(common.HexToHash("0xf0"))
-	verifyIterator(t, 1, it)
-
-	it.Seek(common.HexToHash("0xff"))
-	verifyIterator(t, 0, it)
-
-}
diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go
new file mode 100644
index 000000000..6df7b3147
--- /dev/null
+++ b/core/state/snapshot/iterator.go
@@ -0,0 +1,116 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"sort"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// AccountIterator is an iterator to step over all the accounts in a snapshot,
+// which may or may npt be composed of multiple layers.
+type AccountIterator interface {
+	// Seek steps the iterator forward as many elements as needed, so that after
+	// calling Next(), the iterator will be at a key higher than the given hash.
+	Seek(hash common.Hash)
+
+	// Next steps the iterator forward one element, returning false if exhausted,
+	// or an error if iteration failed for some reason (e.g. root being iterated
+	// becomes stale and garbage collected).
+	Next() bool
+
+	// Error returns any failure that occurred during iteration, which might have
+	// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+	Error() error
+
+	// Key returns the hash of the account the iterator is currently at.
+	Key() common.Hash
+
+	// Value returns the RLP encoded slim account the iterator is currently at.
+	// An error will be returned if the iterator becomes invalid (e.g. snaph
+	Value() []byte
+}
+
+// diffAccountIterator is an account iterator that steps over the accounts (both
+// live and deleted) contained within a single
+type diffAccountIterator struct {
+	layer *diffLayer
+	index int
+}
+
+func (dl *diffLayer) newAccountIterator() *diffAccountIterator {
+	dl.AccountList()
+	return &diffAccountIterator{layer: dl, index: -1}
+}
+
+// Seek steps the iterator forward as many elements as needed, so that after
+// calling Next(), the iterator will be at a key higher than the given hash.
+func (it *diffAccountIterator) Seek(key common.Hash) {
+	// Search uses binary search to find and return the smallest index i
+	// in [0, n) at which f(i) is true
+	index := sort.Search(len(it.layer.accountList), func(i int) bool {
+		return bytes.Compare(key[:], it.layer.accountList[i][:]) < 0
+	})
+	it.index = index - 1
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diffAccountIterator) Next() bool {
+	if it.index < len(it.layer.accountList) {
+		it.index++
+	}
+	return it.index < len(it.layer.accountList)
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+//
+// A diff layer is immutable after creation content wise and can always be fully
+// iterated without error, so this method always returns nil.
+func (it *diffAccountIterator) Error() error {
+	return nil
+}
+
+// Key returns the hash of the account the iterator is currently at.
+func (it *diffAccountIterator) Key() common.Hash {
+	if it.index < len(it.layer.accountList) {
+		return it.layer.accountList[it.index]
+	}
+	return common.Hash{}
+}
+
+// Value returns the RLP encoded slim account the iterator is currently at.
+func (it *diffAccountIterator) Value() []byte {
+	it.layer.lock.RLock()
+	defer it.layer.lock.RUnlock()
+
+	hash := it.layer.accountList[it.index]
+	if data, ok := it.layer.accountData[hash]; ok {
+		return data
+	}
+	panic("iterator references non-existent layer account")
+}
+
+func (dl *diffLayer) iterators() []AccountIterator {
+	if parent, ok := dl.parent.(*diffLayer); ok {
+		iterators := parent.iterators()
+		return append(iterators, dl.newAccountIterator())
+	}
+	return []AccountIterator{dl.newAccountIterator()}
+}
diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go
new file mode 100644
index 000000000..7ff6e3337
--- /dev/null
+++ b/core/state/snapshot/iterator_binary.go
@@ -0,0 +1,115 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// binaryAccountIterator is a simplistic iterator to step over the accounts in
+// a snapshot, which may or may npt be composed of multiple layers. Performance
+// wise this iterator is slow, it's meant for cross validating the fast one,
+type binaryAccountIterator struct {
+	a     *diffAccountIterator
+	b     AccountIterator
+	aDone bool
+	bDone bool
+	k     common.Hash
+	fail  error
+}
+
+// newBinaryAccountIterator creates a simplistic account iterator to step over
+// all the accounts in a slow, but eaily verifyable way.
+func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		// parent is the disk layer
+		return dl.newAccountIterator()
+	}
+	l := &binaryAccountIterator{
+		a: dl.newAccountIterator(),
+		b: parent.newBinaryAccountIterator(),
+	}
+	l.aDone = !l.a.Next()
+	l.bDone = !l.b.Next()
+	return l
+}
+
+// Seek steps the iterator forward as many elements as needed, so that after
+// calling Next(), the iterator will be at a key higher than the given hash.
+func (it *binaryAccountIterator) Seek(key common.Hash) {
+	panic("todo: implement")
+}
+
+// Next steps the iterator forward one element, returning false if exhausted,
+// or an error if iteration failed for some reason (e.g. root being iterated
+// becomes stale and garbage collected).
+func (it *binaryAccountIterator) Next() bool {
+	if it.aDone && it.bDone {
+		return false
+	}
+	nextB := it.b.Key()
+first:
+	nextA := it.a.Key()
+	if it.aDone {
+		it.bDone = !it.b.Next()
+		it.k = nextB
+		return true
+	}
+	if it.bDone {
+		it.aDone = !it.a.Next()
+		it.k = nextA
+		return true
+	}
+	if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
+		it.aDone = !it.a.Next()
+		it.k = nextA
+		return true
+	} else if diff == 0 {
+		// Now we need to advance one of them
+		it.aDone = !it.a.Next()
+		goto first
+	}
+	it.bDone = !it.b.Next()
+	it.k = nextB
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (it *binaryAccountIterator) Error() error {
+	return it.fail
+}
+
+// Key returns the hash of the account the iterator is currently at.
+func (it *binaryAccountIterator) Key() common.Hash {
+	return it.k
+}
+
+// Value returns the RLP encoded slim account the iterator is currently at, or
+// nil if the iterated snapshot stack became stale (you can check Error after
+// to see if it failed or not).
+func (it *binaryAccountIterator) Value() []byte {
+	blob, err := it.a.layer.AccountRLP(it.k)
+	if err != nil {
+		it.fail = err
+		return nil
+	}
+	return blob
+}
diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
new file mode 100644
index 000000000..d3f315353
--- /dev/null
+++ b/core/state/snapshot/iterator_fast.go
@@ -0,0 +1,211 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// fastAccountIterator is a more optimized multi-layer iterator which maintains a
+// direct mapping of all iterators leading down to the bottom layer
+type fastAccountIterator struct {
+	iterators []AccountIterator
+	initiated bool
+	fail      error
+}
+
+// The fast iterator does not query parents as much.
+func (dl *diffLayer) newFastAccountIterator() AccountIterator {
+	f := &fastAccountIterator{
+		iterators: dl.iterators(),
+		initiated: false,
+	}
+	f.Seek(common.Hash{})
+	return f
+}
+
+// Len returns the number of active iterators
+func (fi *fastAccountIterator) Len() int {
+	return len(fi.iterators)
+}
+
+// Less implements sort.Interface
+func (fi *fastAccountIterator) Less(i, j int) bool {
+	a := fi.iterators[i].Key()
+	b := fi.iterators[j].Key()
+	return bytes.Compare(a[:], b[:]) < 0
+}
+
+// Swap implements sort.Interface
+func (fi *fastAccountIterator) Swap(i, j int) {
+	fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
+}
+
+func (fi *fastAccountIterator) Seek(key common.Hash) {
+	// We need to apply this across all iterators
+	var seen = make(map[common.Hash]struct{})
+
+	length := len(fi.iterators)
+	for i, it := range fi.iterators {
+		it.Seek(key)
+		for {
+			if !it.Next() {
+				// To be removed
+				// swap it to the last position for now
+				fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
+				length--
+				break
+			}
+			v := it.Key()
+			if _, exist := seen[v]; !exist {
+				seen[v] = struct{}{}
+				break
+			}
+		}
+	}
+	// Now remove those that were placed in the end
+	fi.iterators = fi.iterators[:length]
+	// The list is now totally unsorted, need to re-sort the entire list
+	sort.Sort(fi)
+	fi.initiated = false
+}
+
+// Next implements the Iterator interface. It returns false if no more elemnts
+// can be retrieved (false == exhausted)
+func (fi *fastAccountIterator) Next() bool {
+	if len(fi.iterators) == 0 {
+		return false
+	}
+	if !fi.initiated {
+		// Don't forward first time -- we had to 'Next' once in order to
+		// do the sorting already
+		fi.initiated = true
+		return true
+	}
+	return fi.innerNext(0)
+}
+
+// innerNext handles the next operation internally,
+// and should be invoked when we know that two elements in the list may have
+// the same value.
+// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
+// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
+// along the list and apply the same operation if needed
+func (fi *fastAccountIterator) innerNext(pos int) bool {
+	if !fi.iterators[pos].Next() {
+		//Exhausted, remove this iterator
+		fi.remove(pos)
+		if len(fi.iterators) == 0 {
+			return false
+		}
+		return true
+	}
+	if pos == len(fi.iterators)-1 {
+		// Only one iterator left
+		return true
+	}
+	// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
+	val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
+	diff := bytes.Compare(val[:], neighbour[:])
+	if diff < 0 {
+		// It is still in correct place
+		return true
+	}
+	if diff == 0 {
+		// It has same value as the neighbour. So still in correct place, but
+		// we need to iterate on the neighbour
+		fi.innerNext(pos + 1)
+		return true
+	}
+	// At this point, the elem is in the wrong location, but the
+	// remaining list is sorted. Find out where to move the elem
+	iterationNeeded := false
+	index := sort.Search(len(fi.iterators), func(n int) bool {
+		if n <= pos {
+			// No need to search 'behind' us
+			return false
+		}
+		if n == len(fi.iterators)-1 {
+			// Can always place an elem last
+			return true
+		}
+		neighbour := fi.iterators[n+1].Key()
+		diff := bytes.Compare(val[:], neighbour[:])
+		if diff == 0 {
+			// The elem we're placing it next to has the same value,
+			// so it's going to need further iteration
+			iterationNeeded = true
+		}
+		return diff < 0
+	})
+	fi.move(pos, index)
+	if iterationNeeded {
+		fi.innerNext(index)
+	}
+	return true
+}
+
+// move moves an iterator to another position in the list
+func (fi *fastAccountIterator) move(index, newpos int) {
+	if newpos > len(fi.iterators)-1 {
+		newpos = len(fi.iterators) - 1
+	}
+	var (
+		elem   = fi.iterators[index]
+		middle = fi.iterators[index+1 : newpos+1]
+		suffix []AccountIterator
+	)
+	if newpos < len(fi.iterators)-1 {
+		suffix = fi.iterators[newpos+1:]
+	}
+	fi.iterators = append(fi.iterators[:index], middle...)
+	fi.iterators = append(fi.iterators, elem)
+	fi.iterators = append(fi.iterators, suffix...)
+}
+
+// remove drops an iterator from the list
+func (fi *fastAccountIterator) remove(index int) {
+	fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (fi *fastAccountIterator) Error() error {
+	return fi.fail
+}
+
+// Key returns the current key
+func (fi *fastAccountIterator) Key() common.Hash {
+	return fi.iterators[0].Key()
+}
+
+// Value returns the current key
+func (fi *fastAccountIterator) Value() []byte {
+	panic("todo")
+}
+
+// Debug is a convencience helper during testing
+func (fi *fastAccountIterator) Debug() {
+	for _, it := range fi.iterators {
+		fmt.Printf(" %v ", it.Key()[31])
+	}
+	fmt.Println()
+}
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
new file mode 100644
index 000000000..597523189
--- /dev/null
+++ b/core/state/snapshot/iterator_test.go
@@ -0,0 +1,396 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"encoding/binary"
+	"math/rand"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// TestIteratorBasics tests some simple single-layer iteration
+func TestIteratorBasics(t *testing.T) {
+	var (
+		accounts = make(map[common.Hash][]byte)
+		storage  = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	for i := 0; i < 100; i++ {
+		h := randomHash()
+		data := randomAccount()
+		accounts[h] = data
+		if rand.Intn(20) < 10 {
+			accStorage := make(map[common.Hash][]byte)
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[h] = accStorage
+		}
+	}
+	// Add some (identical) layers on top
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
+	it := parent.newAccountIterator()
+	verifyIterator(t, 100, it)
+}
+
+type testIterator struct {
+	values []byte
+}
+
+func newTestIterator(values ...byte) *testIterator {
+	return &testIterator{values}
+}
+
+func (ti *testIterator) Seek(common.Hash) {
+	panic("implement me")
+}
+
+func (ti *testIterator) Next() bool {
+	ti.values = ti.values[1:]
+	if len(ti.values) == 0 {
+		return false
+	}
+	return true
+}
+
+func (ti *testIterator) Error() error {
+	panic("implement me")
+}
+
+func (ti *testIterator) Key() common.Hash {
+	return common.BytesToHash([]byte{ti.values[0]})
+}
+
+func (ti *testIterator) Value() []byte {
+	panic("implement me")
+}
+
+func TestFastIteratorBasics(t *testing.T) {
+	type testCase struct {
+		lists   [][]byte
+		expKeys []byte
+	}
+	for i, tc := range []testCase{
+		{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
+			{7, 14, 15}, {9, 13, 15, 16}},
+			expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
+		{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
+			{9, 10}, {10, 13, 15, 16}},
+			expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
+	} {
+		var iterators []AccountIterator
+		for _, data := range tc.lists {
+			iterators = append(iterators, newTestIterator(data...))
+
+		}
+		fi := &fastAccountIterator{
+			iterators: iterators,
+			initiated: false,
+		}
+		count := 0
+		for fi.Next() {
+			if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
+				t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
+			}
+			count++
+		}
+	}
+}
+
+func verifyIterator(t *testing.T, expCount int, it AccountIterator) {
+	var (
+		i    = 0
+		last = common.Hash{}
+	)
+	for it.Next() {
+		v := it.Key()
+		if bytes.Compare(last[:], v[:]) >= 0 {
+			t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
+		}
+		i++
+	}
+	if i != expCount {
+		t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
+	}
+}
+
+// TestIteratorTraversal tests some simple multi-layer iteration
+func TestIteratorTraversal(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+
+	mkAccounts := func(args ...string) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for _, h := range args {
+			accounts[common.HexToHash(h)] = randomAccount()
+		}
+		return accounts
+	}
+	// entries in multiple layers should only become output once
+	parent := newDiffLayer(emptyLayer(), common.Hash{},
+		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+
+	child = child.Update(common.Hash{},
+		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+
+	// single layer iterator
+	verifyIterator(t, 3, child.newAccountIterator())
+	// multi-layered binary iterator
+	verifyIterator(t, 7, child.newBinaryAccountIterator())
+	// multi-layered fast iterator
+	verifyIterator(t, 7, child.newFastAccountIterator())
+}
+
+func TestIteratorLargeTraversal(t *testing.T) {
+	// This testcase is a bit notorious -- all layers contain the exact
+	// same 200 accounts.
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	parent := newDiffLayer(emptyLayer(), common.Hash{},
+		mkAccounts(200), storage)
+	child := parent.Update(common.Hash{},
+		mkAccounts(200), storage)
+	for i := 2; i < 100; i++ {
+		child = child.Update(common.Hash{},
+			mkAccounts(200), storage)
+	}
+	// single layer iterator
+	verifyIterator(t, 200, child.newAccountIterator())
+	// multi-layered binary iterator
+	verifyIterator(t, 200, child.newBinaryAccountIterator())
+	// multi-layered fast iterator
+	verifyIterator(t, 200, child.newFastAccountIterator())
+}
+
+// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
+// same 200 accounts. That means that we need to process 2000 items, but only
+// spit out 200 values eventually.
+//
+//BenchmarkIteratorTraversal/binary_iterator-6         	    2008	    573290 ns/op	    9520 B/op	     199 allocs/op
+//BenchmarkIteratorTraversal/fast_iterator-6           	    1946	    575596 ns/op	   20146 B/op	     134 allocs/op
+func BenchmarkIteratorTraversal(b *testing.B) {
+
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	parent := newDiffLayer(emptyLayer(), common.Hash{},
+		mkAccounts(200), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts(200), storage)
+
+	for i := 2; i < 100; i++ {
+		child = child.Update(common.Hash{},
+			mkAccounts(200), storage)
+
+	}
+	// We call this once before the benchmark, so the creation of
+	// sorted accountlists are not included in the results.
+	child.newBinaryAccountIterator()
+	b.Run("binary iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newBinaryAccountIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newFastAccountIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+}
+
+// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
+// the baselayer is a lot larger than the upper layer.
+//
+// This is heavy on the binary iterator, which in most cases will have to
+// call recursively 100 times for the majority of the values
+//
+// BenchmarkIteratorLargeBaselayer/binary_iterator-6    	     585	   2067377 ns/op	    9520 B/op	     199 allocs/op
+// BenchmarkIteratorLargeBaselayer/fast_iterator-6      	   13198	     91043 ns/op	    8601 B/op	     118 allocs/op
+func BenchmarkIteratorLargeBaselayer(b *testing.B) {
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+
+	parent := newDiffLayer(emptyLayer(), common.Hash{},
+		mkAccounts(2000), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts(20), storage)
+
+	for i := 2; i < 100; i++ {
+		child = child.Update(common.Hash{},
+			mkAccounts(20), storage)
+
+	}
+	// We call this once before the benchmark, so the creation of
+	// sorted accountlists are not included in the results.
+	child.newBinaryAccountIterator()
+	b.Run("binary iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newBinaryAccountIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newFastAccountIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+}
+
+// TestIteratorFlatting tests what happens when we
+// - have a live iterator on child C (parent C1 -> C2 .. CN)
+// - flattens C2 all the way into CN
+// - continues iterating
+// Right now, this "works" simply because the keys do not change -- the
+// iterator is not aware that a layer has become stale. This naive
+// solution probably won't work in the long run, however
+func TestIteratorFlattning(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	mkAccounts := func(args ...string) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for _, h := range args {
+			accounts[common.HexToHash(h)] = randomAccount()
+		}
+		return accounts
+	}
+	// entries in multiple layers should only become output once
+	parent := newDiffLayer(emptyLayer(), common.Hash{},
+		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+
+	child = child.Update(common.Hash{},
+		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+
+	it := child.newFastAccountIterator()
+	child.parent.(*diffLayer).flatten()
+	// The parent should now be stale
+	verifyIterator(t, 7, it)
+}
+
+func TestIteratorSeek(t *testing.T) {
+	storage := make(map[common.Hash]map[common.Hash][]byte)
+	mkAccounts := func(args ...string) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for _, h := range args {
+			accounts[common.HexToHash(h)] = randomAccount()
+		}
+		return accounts
+	}
+	parent := newDiffLayer(emptyLayer(), common.Hash{},
+		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+	it := AccountIterator(parent.newAccountIterator())
+	// expected: ee, f0, ff
+	it.Seek(common.HexToHash("0xdd"))
+	verifyIterator(t, 3, it)
+
+	it = parent.newAccountIterator()
+	// expected: ee, f0, ff
+	it.Seek(common.HexToHash("0xaa"))
+	verifyIterator(t, 3, it)
+
+	it = parent.newAccountIterator()
+	// expected: nothing
+	it.Seek(common.HexToHash("0xff"))
+	verifyIterator(t, 0, it)
+
+	child := parent.Update(common.Hash{},
+		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+
+	child = child.Update(common.Hash{},
+		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+
+	it = child.newFastAccountIterator()
+	// expected: cc, dd, ee, f0, ff
+	it.Seek(common.HexToHash("0xbb"))
+	verifyIterator(t, 5, it)
+
+	it = child.newFastAccountIterator()
+	it.Seek(common.HexToHash("0xef"))
+	// exp: f0, ff
+	verifyIterator(t, 2, it)
+
+	it = child.newFastAccountIterator()
+	it.Seek(common.HexToHash("0xf0"))
+	verifyIterator(t, 1, it)
+
+	it.Seek(common.HexToHash("0xff"))
+	verifyIterator(t, 0, it)
+}

From e5708353562405684ef7d34602635cc25231ad36 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Fri, 6 Dec 2019 23:27:18 +0100
Subject: [PATCH 14/28] core/state/snapshot: implement iterator priority for
 fast direct data lookup

---
 core/state/snapshot/iterator_fast.go | 117 +++++++++++------
 core/state/snapshot/iterator_test.go | 180 +++++++++++++++++++++++++--
 2 files changed, 250 insertions(+), 47 deletions(-)

diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
index d3f315353..8df037e9f 100644
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
@@ -24,20 +24,27 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 )
 
+type weightedIterator struct {
+	it       AccountIterator
+	priority int
+}
+
 // fastAccountIterator is a more optimized multi-layer iterator which maintains a
 // direct mapping of all iterators leading down to the bottom layer
 type fastAccountIterator struct {
-	iterators []AccountIterator
+	iterators []*weightedIterator
 	initiated bool
 	fail      error
 }
 
-// The fast iterator does not query parents as much.
+// newFastAccountIterator creates a new fastAccountIterator
 func (dl *diffLayer) newFastAccountIterator() AccountIterator {
 	f := &fastAccountIterator{
-		iterators: dl.iterators(),
 		initiated: false,
 	}
+	for i, it := range dl.iterators() {
+		f.iterators = append(f.iterators, &weightedIterator{it, -i})
+	}
 	f.Seek(common.Hash{})
 	return f
 }
@@ -49,9 +56,17 @@ func (fi *fastAccountIterator) Len() int {
 
 // Less implements sort.Interface
 func (fi *fastAccountIterator) Less(i, j int) bool {
-	a := fi.iterators[i].Key()
-	b := fi.iterators[j].Key()
-	return bytes.Compare(a[:], b[:]) < 0
+	a := fi.iterators[i].it.Key()
+	b := fi.iterators[j].it.Key()
+	bDiff := bytes.Compare(a[:], b[:])
+	if bDiff < 0 {
+		return true
+	}
+	if bDiff > 0 {
+		return false
+	}
+	// keys are equal, sort by iterator priority
+	return fi.iterators[i].priority < fi.iterators[j].priority
 }
 
 // Swap implements sort.Interface
@@ -61,23 +76,42 @@ func (fi *fastAccountIterator) Swap(i, j int) {
 
 func (fi *fastAccountIterator) Seek(key common.Hash) {
 	// We need to apply this across all iterators
-	var seen = make(map[common.Hash]struct{})
+	var seen = make(map[common.Hash]int)
 
 	length := len(fi.iterators)
-	for i, it := range fi.iterators {
-		it.Seek(key)
+	for i := 0; i < len(fi.iterators); i++ {
+		//for i, it := range fi.iterators {
+		it := fi.iterators[i]
+		it.it.Seek(key)
 		for {
-			if !it.Next() {
+			if !it.it.Next() {
 				// To be removed
 				// swap it to the last position for now
 				fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
 				length--
 				break
 			}
-			v := it.Key()
-			if _, exist := seen[v]; !exist {
-				seen[v] = struct{}{}
+			v := it.it.Key()
+			if other, exist := seen[v]; !exist {
+				seen[v] = i
 				break
+			} else {
+				// This whole else-block can be avoided, if we instead
+				// do an inital priority-sort of the iterators. If we do that,
+				// then we'll only wind up here if a lower-priority (preferred) iterator
+				// has the same value, and then we will always just continue.
+				// However, it costs an extra sort, so it's probably not better
+
+				// One needs to be progressed, use priority to determine which
+				if fi.iterators[other].priority < it.priority {
+					// the 'it' should be progressed
+					continue
+				} else {
+					// the 'other' should be progressed - swap them
+					it = fi.iterators[other]
+					fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other]
+					continue
+				}
 			}
 		}
 	}
@@ -110,7 +144,7 @@ func (fi *fastAccountIterator) Next() bool {
 // innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
 // along the list and apply the same operation if needed
 func (fi *fastAccountIterator) innerNext(pos int) bool {
-	if !fi.iterators[pos].Next() {
+	if !fi.iterators[pos].it.Next() {
 		//Exhausted, remove this iterator
 		fi.remove(pos)
 		if len(fi.iterators) == 0 {
@@ -123,23 +157,23 @@ func (fi *fastAccountIterator) innerNext(pos int) bool {
 		return true
 	}
 	// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
-	val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key()
-	diff := bytes.Compare(val[:], neighbour[:])
-	if diff < 0 {
+	var (
+		current, neighbour = fi.iterators[pos], fi.iterators[pos+1]
+		val, neighbourVal  = current.it.Key(), neighbour.it.Key()
+	)
+	if diff := bytes.Compare(val[:], neighbourVal[:]); diff < 0 {
 		// It is still in correct place
 		return true
-	}
-	if diff == 0 {
-		// It has same value as the neighbour. So still in correct place, but
-		// we need to iterate on the neighbour
+	} else if diff == 0 && current.priority < neighbour.priority {
+		// So still in correct place, but we need to iterate on the neighbour
 		fi.innerNext(pos + 1)
 		return true
 	}
 	// At this point, the elem is in the wrong location, but the
 	// remaining list is sorted. Find out where to move the elem
-	iterationNeeded := false
+	iteratee := -1
 	index := sort.Search(len(fi.iterators), func(n int) bool {
-		if n <= pos {
+		if n < pos {
 			// No need to search 'behind' us
 			return false
 		}
@@ -147,18 +181,29 @@ func (fi *fastAccountIterator) innerNext(pos int) bool {
 			// Can always place an elem last
 			return true
 		}
-		neighbour := fi.iterators[n+1].Key()
-		diff := bytes.Compare(val[:], neighbour[:])
-		if diff == 0 {
-			// The elem we're placing it next to has the same value,
-			// so it's going to need further iteration
-			iterationNeeded = true
+		neighbour := fi.iterators[n+1].it.Key()
+		if diff := bytes.Compare(val[:], neighbour[:]); diff < 0 {
+			return true
+		} else if diff > 0 {
+			return false
 		}
-		return diff < 0
+		// The elem we're placing it next to has the same value,
+		// so whichever winds up on n+1 will need further iteraton
+		iteratee = n + 1
+		if current.priority < fi.iterators[n+1].priority {
+			// We can drop the iterator here
+			return true
+		}
+		// We need to move it one step further
+		return false
+		// TODO benchmark which is best, this works too:
+		//iteratee = n
+		//return true
+		// Doing so should finish the current search earlier
 	})
 	fi.move(pos, index)
-	if iterationNeeded {
-		fi.innerNext(index)
+	if iteratee != -1 {
+		fi.innerNext(iteratee)
 	}
 	return true
 }
@@ -171,7 +216,7 @@ func (fi *fastAccountIterator) move(index, newpos int) {
 	var (
 		elem   = fi.iterators[index]
 		middle = fi.iterators[index+1 : newpos+1]
-		suffix []AccountIterator
+		suffix []*weightedIterator
 	)
 	if newpos < len(fi.iterators)-1 {
 		suffix = fi.iterators[newpos+1:]
@@ -194,18 +239,18 @@ func (fi *fastAccountIterator) Error() error {
 
 // Key returns the current key
 func (fi *fastAccountIterator) Key() common.Hash {
-	return fi.iterators[0].Key()
+	return fi.iterators[0].it.Key()
 }
 
 // Value returns the current key
 func (fi *fastAccountIterator) Value() []byte {
-	panic("todo")
+	return fi.iterators[0].it.Value()
 }
 
 // Debug is a convencience helper during testing
 func (fi *fastAccountIterator) Debug() {
 	for _, it := range fi.iterators {
-		fmt.Printf(" %v ", it.Key()[31])
+		fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Key()[0])
 	}
 	fmt.Println()
 }
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
index 597523189..01e525653 100644
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
@@ -19,6 +19,7 @@ package snapshot
 import (
 	"bytes"
 	"encoding/binary"
+	"fmt"
 	"math/rand"
 	"testing"
 
@@ -95,9 +96,10 @@ func TestFastIteratorBasics(t *testing.T) {
 			{9, 10}, {10, 13, 15, 16}},
 			expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
 	} {
-		var iterators []AccountIterator
-		for _, data := range tc.lists {
-			iterators = append(iterators, newTestIterator(data...))
+		var iterators []*weightedIterator
+		for i, data := range tc.lists {
+			it := newTestIterator(data...)
+			iterators = append(iterators, &weightedIterator{it, i})
 
 		}
 		fi := &fastAccountIterator{
@@ -162,6 +164,69 @@ func TestIteratorTraversal(t *testing.T) {
 	verifyIterator(t, 7, child.newFastAccountIterator())
 }
 
+// TestIteratorTraversalValues tests some multi-layer iteration, where we
+// also expect the correct values to show up
+func TestIteratorTraversalValues(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+		a       = make(map[common.Hash][]byte)
+		b       = make(map[common.Hash][]byte)
+		c       = make(map[common.Hash][]byte)
+		d       = make(map[common.Hash][]byte)
+		e       = make(map[common.Hash][]byte)
+		f       = make(map[common.Hash][]byte)
+		g       = make(map[common.Hash][]byte)
+		h       = make(map[common.Hash][]byte)
+	)
+	// entries in multiple layers should only become output once
+	for i := byte(2); i < 0xff; i++ {
+		a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i))
+		if i > 20 && i%2 == 0 {
+			b[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 1, i))
+		}
+		if i%4 == 0 {
+			c[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 2, i))
+		}
+		if i%7 == 0 {
+			d[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 3, i))
+		}
+		if i%8 == 0 {
+			e[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 4, i))
+		}
+		if i > 50 || i < 85 {
+			f[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 5, i))
+		}
+		if i%64 == 0 {
+			g[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 6, i))
+		}
+		if i%128 == 0 {
+			h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i))
+		}
+	}
+	child := newDiffLayer(emptyLayer(), common.Hash{}, a, storage).
+		Update(common.Hash{}, b, storage).
+		Update(common.Hash{}, c, storage).
+		Update(common.Hash{}, d, storage).
+		Update(common.Hash{}, e, storage).
+		Update(common.Hash{}, f, storage).
+		Update(common.Hash{}, g, storage).
+		Update(common.Hash{}, h, storage)
+
+	it := child.newFastAccountIterator()
+	for it.Next() {
+		key := it.Key()
+		exp, err := child.accountRLP(key, 0)
+		if err != nil {
+			t.Fatal(err)
+		}
+		got := it.Value()
+		if !bytes.Equal(exp, got) {
+			t.Fatalf("Error on key %x, got %v exp %v", key, string(got), string(exp))
+		}
+		//fmt.Printf("val: %v\n", string(it.Value()))
+	}
+}
+
 func TestIteratorLargeTraversal(t *testing.T) {
 	// This testcase is a bit notorious -- all layers contain the exact
 	// same 200 accounts.
@@ -195,8 +260,14 @@ func TestIteratorLargeTraversal(t *testing.T) {
 // same 200 accounts. That means that we need to process 2000 items, but only
 // spit out 200 values eventually.
 //
-//BenchmarkIteratorTraversal/binary_iterator-6         	    2008	    573290 ns/op	    9520 B/op	     199 allocs/op
-//BenchmarkIteratorTraversal/fast_iterator-6           	    1946	    575596 ns/op	   20146 B/op	     134 allocs/op
+// The value-fetching benchmark is easy on the binary iterator, since it never has to reach
+// down at any depth for retrieving the values -- all are on the toppmost layer
+//
+// BenchmarkIteratorTraversal/binary_iterator_keys-6         	    2239	    483674 ns/op
+// BenchmarkIteratorTraversal/binary_iterator_values-6       	    2403	    501810 ns/op
+// BenchmarkIteratorTraversal/fast_iterator_keys-6           	    1923	    677966 ns/op
+// BenchmarkIteratorTraversal/fast_iterator_values-6         	    1741	    649967 ns/op
+//
 func BenchmarkIteratorTraversal(b *testing.B) {
 
 	var storage = make(map[common.Hash]map[common.Hash][]byte)
@@ -224,7 +295,7 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 	// We call this once before the benchmark, so the creation of
 	// sorted accountlists are not included in the results.
 	child.newBinaryAccountIterator()
-	b.Run("binary iterator", func(b *testing.B) {
+	b.Run("binary iterator keys", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
 			it := child.newBinaryAccountIterator()
@@ -236,7 +307,20 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 			}
 		}
 	})
-	b.Run("fast iterator", func(b *testing.B) {
+	b.Run("binary iterator values", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newBinaryAccountIterator()
+			for it.Next() {
+				got++
+				child.accountRLP(it.Key(), 0)
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator keys", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
 			it := child.newFastAccountIterator()
@@ -248,6 +332,19 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 			}
 		}
 	})
+	b.Run("fast iterator values", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newFastAccountIterator()
+			for it.Next() {
+				got++
+				it.Value()
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
 }
 
 // BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
@@ -256,8 +353,10 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 // This is heavy on the binary iterator, which in most cases will have to
 // call recursively 100 times for the majority of the values
 //
-// BenchmarkIteratorLargeBaselayer/binary_iterator-6    	     585	   2067377 ns/op	    9520 B/op	     199 allocs/op
-// BenchmarkIteratorLargeBaselayer/fast_iterator-6      	   13198	     91043 ns/op	    8601 B/op	     118 allocs/op
+// BenchmarkIteratorLargeBaselayer/binary_iterator_(keys)-6         	     514	   1971999 ns/op
+// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6           	   10000	    114385 ns/op
+// BenchmarkIteratorLargeBaselayer/binary_iterator_(values)-6       	      61	  18997492 ns/op
+// BenchmarkIteratorLargeBaselayer/fast_iterator_(values)-6         	    4047	    296823 ns/op
 func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 	var storage = make(map[common.Hash]map[common.Hash][]byte)
 
@@ -285,7 +384,7 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 	// We call this once before the benchmark, so the creation of
 	// sorted accountlists are not included in the results.
 	child.newBinaryAccountIterator()
-	b.Run("binary iterator", func(b *testing.B) {
+	b.Run("binary iterator (keys)", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
 			it := child.newBinaryAccountIterator()
@@ -297,7 +396,7 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 			}
 		}
 	})
-	b.Run("fast iterator", func(b *testing.B) {
+	b.Run("fast iterator (keys)", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
 			it := child.newFastAccountIterator()
@@ -309,6 +408,34 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 			}
 		}
 	})
+	b.Run("binary iterator (values)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newBinaryAccountIterator()
+			for it.Next() {
+				got++
+				v := it.Key()
+				child.accountRLP(v, -0)
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+
+	b.Run("fast iterator (values)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := child.newFastAccountIterator()
+			for it.Next() {
+				it.Value()
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
 }
 
 // TestIteratorFlatting tests what happens when we
@@ -394,3 +521,34 @@ func TestIteratorSeek(t *testing.T) {
 	it.Seek(common.HexToHash("0xff"))
 	verifyIterator(t, 0, it)
 }
+
+//BenchmarkIteratorSeek/init+seek-6         	    4328	    245477 ns/op
+func BenchmarkIteratorSeek(b *testing.B) {
+
+	var storage = make(map[common.Hash]map[common.Hash][]byte)
+	mkAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	layer := newDiffLayer(emptyLayer(), common.Hash{}, mkAccounts(200), storage)
+	for i := 1; i < 100; i++ {
+		layer = layer.Update(common.Hash{},
+			mkAccounts(200), storage)
+	}
+	b.Run("init+seek", func(b *testing.B) {
+		b.ResetTimer()
+		seekpos := make([]byte, 20)
+		for i := 0; i < b.N; i++ {
+			b.StopTimer()
+			rand.Read(seekpos)
+			it := layer.newFastAccountIterator()
+			b.StartTimer()
+			it.Seek(common.BytesToHash(seekpos))
+		}
+	})
+}

From 6ddb92a089c7b07f512a1236a69b1cd568a660b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 10 Dec 2019 11:00:03 +0200
Subject: [PATCH 15/28] core/state/snapshot: full featured account iteration

---
 core/state/snapshot/difflayer.go       |  80 ++--
 core/state/snapshot/difflayer_test.go  |  14 -
 core/state/snapshot/disklayer.go       |   5 +
 core/state/snapshot/iterator.go        | 188 ++++++---
 core/state/snapshot/iterator_binary.go |  28 +-
 core/state/snapshot/iterator_fast.go   | 272 ++++++------
 core/state/snapshot/iterator_test.go   | 559 ++++++++++++++-----------
 core/state/snapshot/snapshot.go        |  31 +-
 core/state/snapshot/snapshot_test.go   |  55 ++-
 core/state/snapshot/wipe_test.go       |   9 -
 10 files changed, 717 insertions(+), 524 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 05d55a6fa..855d862de 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -229,6 +229,11 @@ func (dl *diffLayer) Root() common.Hash {
 	return dl.root
 }
 
+// Parent returns the subsequent layer of a diff layer.
+func (dl *diffLayer) Parent() snapshot {
+	return dl.parent
+}
+
 // Stale return whether this layer has become stale (was flattened across) or if
 // it's still live.
 func (dl *diffLayer) Stale() bool {
@@ -405,7 +410,7 @@ func (dl *diffLayer) flatten() snapshot {
 	for hash, data := range dl.accountData {
 		parent.accountData[hash] = data
 	}
-	// Overwrite all the updates storage slots (individually)
+	// Overwrite all the updated storage slots (individually)
 	for accountHash, storage := range dl.storageData {
 		// If storage didn't exist (or was deleted) in the parent; or if the storage
 		// was freshly deleted in the child, overwrite blindly
@@ -425,53 +430,62 @@ func (dl *diffLayer) flatten() snapshot {
 		parent:      parent.parent,
 		origin:      parent.origin,
 		root:        dl.root,
-		storageList: parent.storageList,
-		storageData: parent.storageData,
-		accountList: parent.accountList,
 		accountData: parent.accountData,
+		storageData: parent.storageData,
+		storageList: make(map[common.Hash][]common.Hash),
 		diffed:      dl.diffed,
 		memory:      parent.memory + dl.memory,
 	}
 }
 
-// AccountList returns a sorted list of all accounts in this difflayer.
+// AccountList returns a sorted list of all accounts in this difflayer, including
+// the deleted ones.
+//
+// Note, the returned slice is not a copy, so do not modify it.
 func (dl *diffLayer) AccountList() []common.Hash {
+	// If an old list already exists, return it
+	dl.lock.RLock()
+	list := dl.accountList
+	dl.lock.RUnlock()
+
+	if list != nil {
+		return list
+	}
+	// No old sorted account list exists, generate a new one
 	dl.lock.Lock()
 	defer dl.lock.Unlock()
-	if dl.accountList != nil {
-		return dl.accountList
+
+	dl.accountList = make([]common.Hash, 0, len(dl.accountData))
+	for hash := range dl.accountData {
+		dl.accountList = append(dl.accountList, hash)
 	}
-	accountList := make([]common.Hash, len(dl.accountData))
-	i := 0
-	for k, _ := range dl.accountData {
-		accountList[i] = k
-		i++
-		// This would be a pretty good opportunity to also
-		// calculate the size, if we want to
-	}
-	sort.Sort(hashes(accountList))
-	dl.accountList = accountList
+	sort.Sort(hashes(dl.accountList))
 	return dl.accountList
 }
 
-// StorageList returns a sorted list of all storage slot hashes
-// in this difflayer for the given account.
+// StorageList returns a sorted list of all storage slot hashes in this difflayer
+// for the given account.
+//
+// Note, the returned slice is not a copy, so do not modify it.
 func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
+	// If an old list already exists, return it
+	dl.lock.RLock()
+	list := dl.storageList[accountHash]
+	dl.lock.RUnlock()
+
+	if list != nil {
+		return list
+	}
+	// No old sorted account list exists, generate a new one
 	dl.lock.Lock()
 	defer dl.lock.Unlock()
-	if dl.storageList[accountHash] != nil {
-		return dl.storageList[accountHash]
+
+	storageMap := dl.storageData[accountHash]
+	storageList := make([]common.Hash, 0, len(storageMap))
+	for k, _ := range storageMap {
+		storageList = append(storageList, k)
 	}
-	accountStorageMap := dl.storageData[accountHash]
-	accountStorageList := make([]common.Hash, len(accountStorageMap))
-	i := 0
-	for k, _ := range accountStorageMap {
-		accountStorageList[i] = k
-		i++
-		// This would be a pretty good opportunity to also
-		// calculate the size, if we want to
-	}
-	sort.Sort(hashes(accountStorageList))
-	dl.storageList[accountHash] = accountStorageList
-	return accountStorageList
+	sort.Sort(hashes(storageList))
+	dl.storageList[accountHash] = storageList
+	return storageList
 }
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 7d7b21eb0..80a9b4093 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -18,7 +18,6 @@ package snapshot
 
 import (
 	"bytes"
-	"math/big"
 	"math/rand"
 	"testing"
 
@@ -26,21 +25,8 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb/memorydb"
-	"github.com/ethereum/go-ethereum/rlp"
 )
 
-func randomAccount() []byte {
-	root := randomHash()
-	a := Account{
-		Balance:  big.NewInt(rand.Int63()),
-		Nonce:    rand.Uint64(),
-		Root:     root[:],
-		CodeHash: emptyCode[:],
-	}
-	data, _ := rlp.EncodeToBytes(a)
-	return data
-}
-
 // TestMergeBasics tests some simple merges
 func TestMergeBasics(t *testing.T) {
 	var (
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index 7c5b3e3e9..0c4c3deb1 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -48,6 +48,11 @@ func (dl *diskLayer) Root() common.Hash {
 	return dl.root
 }
 
+// Parent always returns nil as there's no layer below the disk.
+func (dl *diskLayer) Parent() snapshot {
+	return nil
+}
+
 // Stale return whether this layer has become stale (was flattened across) or if
 // it's still live.
 func (dl *diskLayer) Stale() bool {
diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go
index 6df7b3147..4005cb3ca 100644
--- a/core/state/snapshot/iterator.go
+++ b/core/state/snapshot/iterator.go
@@ -18,18 +18,17 @@ package snapshot
 
 import (
 	"bytes"
+	"fmt"
 	"sort"
 
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
 )
 
 // AccountIterator is an iterator to step over all the accounts in a snapshot,
 // which may or may npt be composed of multiple layers.
 type AccountIterator interface {
-	// Seek steps the iterator forward as many elements as needed, so that after
-	// calling Next(), the iterator will be at a key higher than the given hash.
-	Seek(hash common.Hash)
-
 	// Next steps the iterator forward one element, returning false if exhausted,
 	// or an error if iteration failed for some reason (e.g. root being iterated
 	// becomes stale and garbage collected).
@@ -39,43 +38,133 @@ type AccountIterator interface {
 	// caused a premature iteration exit (e.g. snapshot stack becoming stale).
 	Error() error
 
-	// Key returns the hash of the account the iterator is currently at.
-	Key() common.Hash
+	// Hash returns the hash of the account the iterator is currently at.
+	Hash() common.Hash
 
-	// Value returns the RLP encoded slim account the iterator is currently at.
+	// Account returns the RLP encoded slim account the iterator is currently at.
 	// An error will be returned if the iterator becomes invalid (e.g. snaph
-	Value() []byte
+	Account() []byte
+
+	// Release releases associated resources. Release should always succeed and
+	// can be called multiple times without causing error.
+	Release()
 }
 
 // diffAccountIterator is an account iterator that steps over the accounts (both
-// live and deleted) contained within a single
+// live and deleted) contained within a single diff layer. Higher order iterators
+// will use the deleted accounts to skip deeper iterators.
 type diffAccountIterator struct {
-	layer *diffLayer
-	index int
+	// curHash is the current hash the iterator is positioned on. The field is
+	// explicitly tracked since the referenced diff layer might go stale after
+	// the iterator was positioned and we don't want to fail accessing the old
+	// hash as long as the iterator is not touched any more.
+	curHash common.Hash
+
+	// curAccount is the current value the iterator is positioned on. The field
+	// is explicitly tracked since the referenced diff layer might go stale after
+	// the iterator was positioned and we don't want to fail accessing the old
+	// value as long as the iterator is not touched any more.
+	curAccount []byte
+
+	layer *diffLayer    // Live layer to retrieve values from
+	keys  []common.Hash // Keys left in the layer to iterate
+	fail  error         // Any failures encountered (stale)
 }
 
-func (dl *diffLayer) newAccountIterator() *diffAccountIterator {
-	dl.AccountList()
-	return &diffAccountIterator{layer: dl, index: -1}
-}
-
-// Seek steps the iterator forward as many elements as needed, so that after
-// calling Next(), the iterator will be at a key higher than the given hash.
-func (it *diffAccountIterator) Seek(key common.Hash) {
-	// Search uses binary search to find and return the smallest index i
-	// in [0, n) at which f(i) is true
-	index := sort.Search(len(it.layer.accountList), func(i int) bool {
-		return bytes.Compare(key[:], it.layer.accountList[i][:]) < 0
+// AccountIterator creates an account iterator over a single diff layer.
+func (dl *diffLayer) AccountIterator(seek common.Hash) AccountIterator {
+	// Seek out the requested starting account
+	hashes := dl.AccountList()
+	index := sort.Search(len(hashes), func(i int) bool {
+		return bytes.Compare(seek[:], hashes[i][:]) < 0
 	})
-	it.index = index - 1
+	// Assemble and returned the already seeked iterator
+	return &diffAccountIterator{
+		layer: dl,
+		keys:  hashes[index:],
+	}
 }
 
 // Next steps the iterator forward one element, returning false if exhausted.
 func (it *diffAccountIterator) Next() bool {
-	if it.index < len(it.layer.accountList) {
-		it.index++
+	// If the iterator was already stale, consider it a programmer error. Although
+	// we could just return false here, triggering this path would probably mean
+	// somebody forgot to check for Error, so lets blow up instead of undefined
+	// behavior that's hard to debug.
+	if it.fail != nil {
+		panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
 	}
-	return it.index < len(it.layer.accountList)
+	// Stop iterating if all keys were exhausted
+	if len(it.keys) == 0 {
+		return false
+	}
+	// Iterator seems to be still alive, retrieve and cache the live hash and
+	// account value, or fail now if layer became stale
+	it.layer.lock.RLock()
+	defer it.layer.lock.RUnlock()
+
+	if it.layer.stale {
+		it.fail, it.keys = ErrSnapshotStale, nil
+		return false
+	}
+	it.curHash = it.keys[0]
+	if blob, ok := it.layer.accountData[it.curHash]; !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
+	} else {
+		it.curAccount = blob
+	}
+	// Values cached, shift the iterator and notify the user of success
+	it.keys = it.keys[1:]
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (it *diffAccountIterator) Error() error {
+	return it.fail
+}
+
+// Hash returns the hash of the account the iterator is currently at.
+func (it *diffAccountIterator) Hash() common.Hash {
+	return it.curHash
+}
+
+// Account returns the RLP encoded slim account the iterator is currently at.
+func (it *diffAccountIterator) Account() []byte {
+	return it.curAccount
+}
+
+// Release is a noop for diff account iterators as there are no held resources.
+func (it *diffAccountIterator) Release() {}
+
+// diskAccountIterator is an account iterator that steps over the live accounts
+// contained within a disk layer.
+type diskAccountIterator struct {
+	layer *diskLayer
+	it    ethdb.Iterator
+}
+
+// AccountIterator creates an account iterator over a disk layer.
+func (dl *diskLayer) AccountIterator(seek common.Hash) AccountIterator {
+	return &diskAccountIterator{
+		layer: dl,
+		it:    dl.diskdb.NewIteratorWithPrefix(append(rawdb.SnapshotAccountPrefix, seek[:]...)),
+	}
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diskAccountIterator) Next() bool {
+	// If the iterator was already exhausted, don't bother
+	if it.it == nil {
+		return false
+	}
+	// Try to advance the iterator and release it if we reahed the end
+	if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), rawdb.SnapshotAccountPrefix) {
+		it.it.Release()
+		it.it = nil
+		return false
+	}
+	return true
 }
 
 // Error returns any failure that occurred during iteration, which might have
@@ -83,34 +172,25 @@ func (it *diffAccountIterator) Next() bool {
 //
 // A diff layer is immutable after creation content wise and can always be fully
 // iterated without error, so this method always returns nil.
-func (it *diffAccountIterator) Error() error {
-	return nil
+func (it *diskAccountIterator) Error() error {
+	return it.it.Error()
 }
 
-// Key returns the hash of the account the iterator is currently at.
-func (it *diffAccountIterator) Key() common.Hash {
-	if it.index < len(it.layer.accountList) {
-		return it.layer.accountList[it.index]
+// Hash returns the hash of the account the iterator is currently at.
+func (it *diskAccountIterator) Hash() common.Hash {
+	return common.BytesToHash(it.it.Key())
+}
+
+// Account returns the RLP encoded slim account the iterator is currently at.
+func (it *diskAccountIterator) Account() []byte {
+	return it.it.Value()
+}
+
+// Release releases the database snapshot held during iteration.
+func (it *diskAccountIterator) Release() {
+	// The iterator is auto-released on exhaustion, so make sure it's still alive
+	if it.it != nil {
+		it.it.Release()
+		it.it = nil
 	}
-	return common.Hash{}
-}
-
-// Value returns the RLP encoded slim account the iterator is currently at.
-func (it *diffAccountIterator) Value() []byte {
-	it.layer.lock.RLock()
-	defer it.layer.lock.RUnlock()
-
-	hash := it.layer.accountList[it.index]
-	if data, ok := it.layer.accountData[hash]; ok {
-		return data
-	}
-	panic("iterator references non-existent layer account")
-}
-
-func (dl *diffLayer) iterators() []AccountIterator {
-	if parent, ok := dl.parent.(*diffLayer); ok {
-		iterators := parent.iterators()
-		return append(iterators, dl.newAccountIterator())
-	}
-	return []AccountIterator{dl.newAccountIterator()}
 }
diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go
index 7ff6e3337..39288e6fb 100644
--- a/core/state/snapshot/iterator_binary.go
+++ b/core/state/snapshot/iterator_binary.go
@@ -40,10 +40,10 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
 	parent, ok := dl.parent.(*diffLayer)
 	if !ok {
 		// parent is the disk layer
-		return dl.newAccountIterator()
+		return dl.AccountIterator(common.Hash{})
 	}
 	l := &binaryAccountIterator{
-		a: dl.newAccountIterator(),
+		a: dl.AccountIterator(common.Hash{}).(*diffAccountIterator),
 		b: parent.newBinaryAccountIterator(),
 	}
 	l.aDone = !l.a.Next()
@@ -51,12 +51,6 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
 	return l
 }
 
-// Seek steps the iterator forward as many elements as needed, so that after
-// calling Next(), the iterator will be at a key higher than the given hash.
-func (it *binaryAccountIterator) Seek(key common.Hash) {
-	panic("todo: implement")
-}
-
 // Next steps the iterator forward one element, returning false if exhausted,
 // or an error if iteration failed for some reason (e.g. root being iterated
 // becomes stale and garbage collected).
@@ -64,9 +58,9 @@ func (it *binaryAccountIterator) Next() bool {
 	if it.aDone && it.bDone {
 		return false
 	}
-	nextB := it.b.Key()
+	nextB := it.b.Hash()
 first:
-	nextA := it.a.Key()
+	nextA := it.a.Hash()
 	if it.aDone {
 		it.bDone = !it.b.Next()
 		it.k = nextB
@@ -97,15 +91,15 @@ func (it *binaryAccountIterator) Error() error {
 	return it.fail
 }
 
-// Key returns the hash of the account the iterator is currently at.
-func (it *binaryAccountIterator) Key() common.Hash {
+// Hash returns the hash of the account the iterator is currently at.
+func (it *binaryAccountIterator) Hash() common.Hash {
 	return it.k
 }
 
-// Value returns the RLP encoded slim account the iterator is currently at, or
+// Account returns the RLP encoded slim account the iterator is currently at, or
 // nil if the iterated snapshot stack became stale (you can check Error after
 // to see if it failed or not).
-func (it *binaryAccountIterator) Value() []byte {
+func (it *binaryAccountIterator) Account() []byte {
 	blob, err := it.a.layer.AccountRLP(it.k)
 	if err != nil {
 		it.fail = err
@@ -113,3 +107,9 @@ func (it *binaryAccountIterator) Value() []byte {
 	}
 	return blob
 }
+
+// Release recursively releases all the iterators in the stack.
+func (it *binaryAccountIterator) Release() {
+	it.a.Release()
+	it.b.Release()
+}
diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
index 8df037e9f..676a3af17 100644
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
@@ -24,90 +24,121 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 )
 
-type weightedIterator struct {
+// weightedAccountIterator is an account iterator with an assigned weight. It is
+// used to prioritise which account is the correct one if multiple iterators find
+// the same one (modified in multiple consecutive blocks).
+type weightedAccountIterator struct {
 	it       AccountIterator
 	priority int
 }
 
+// weightedAccountIterators is a set of iterators implementing the sort.Interface.
+type weightedAccountIterators []*weightedAccountIterator
+
+// Len implements sort.Interface, returning the number of active iterators.
+func (its weightedAccountIterators) Len() int { return len(its) }
+
+// Less implements sort.Interface, returning which of two iterators in the stack
+// is before the other.
+func (its weightedAccountIterators) Less(i, j int) bool {
+	// Order the iterators primarilly by the account hashes
+	hashI := its[i].it.Hash()
+	hashJ := its[j].it.Hash()
+
+	switch bytes.Compare(hashI[:], hashJ[:]) {
+	case -1:
+		return true
+	case 1:
+		return false
+	}
+	// Same account in multiple layers, split by priority
+	return its[i].priority < its[j].priority
+}
+
+// Swap implements sort.Interface, swapping two entries in the iterator stack.
+func (its weightedAccountIterators) Swap(i, j int) {
+	its[i], its[j] = its[j], its[i]
+}
+
 // fastAccountIterator is a more optimized multi-layer iterator which maintains a
-// direct mapping of all iterators leading down to the bottom layer
+// direct mapping of all iterators leading down to the bottom layer.
 type fastAccountIterator struct {
-	iterators []*weightedIterator
+	tree *Tree       // Snapshot tree to reinitialize stale sub-iterators with
+	root common.Hash // Root hash to reinitialize stale sub-iterators through
+
+	iterators weightedAccountIterators
 	initiated bool
 	fail      error
 }
 
-// newFastAccountIterator creates a new fastAccountIterator
-func (dl *diffLayer) newFastAccountIterator() AccountIterator {
-	f := &fastAccountIterator{
-		initiated: false,
+// newFastAccountIterator creates a new hierarhical account iterator with one
+// element per diff layer. The returned combo iterator can be used to walk over
+// the entire snapshot diff stack simultaneously.
+func newFastAccountIterator(tree *Tree, root common.Hash, seek common.Hash) (AccountIterator, error) {
+	snap := tree.Snapshot(root)
+	if snap == nil {
+		return nil, fmt.Errorf("unknown snapshot: %x", root)
 	}
-	for i, it := range dl.iterators() {
-		f.iterators = append(f.iterators, &weightedIterator{it, -i})
+	fi := &fastAccountIterator{
+		tree: tree,
+		root: root,
 	}
-	f.Seek(common.Hash{})
-	return f
+	current := snap.(snapshot)
+	for depth := 0; current != nil; depth++ {
+		fi.iterators = append(fi.iterators, &weightedAccountIterator{
+			it:       current.AccountIterator(seek),
+			priority: depth,
+		})
+		current = current.Parent()
+	}
+	fi.init()
+	return fi, nil
 }
 
-// Len returns the number of active iterators
-func (fi *fastAccountIterator) Len() int {
-	return len(fi.iterators)
-}
+// init walks over all the iterators and resolves any clashes between them, after
+// which it prepares the stack for step-by-step iteration.
+func (fi *fastAccountIterator) init() {
+	// Track which account hashes are iterators positioned on
+	var positioned = make(map[common.Hash]int)
 
-// Less implements sort.Interface
-func (fi *fastAccountIterator) Less(i, j int) bool {
-	a := fi.iterators[i].it.Key()
-	b := fi.iterators[j].it.Key()
-	bDiff := bytes.Compare(a[:], b[:])
-	if bDiff < 0 {
-		return true
-	}
-	if bDiff > 0 {
-		return false
-	}
-	// keys are equal, sort by iterator priority
-	return fi.iterators[i].priority < fi.iterators[j].priority
-}
-
-// Swap implements sort.Interface
-func (fi *fastAccountIterator) Swap(i, j int) {
-	fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i]
-}
-
-func (fi *fastAccountIterator) Seek(key common.Hash) {
-	// We need to apply this across all iterators
-	var seen = make(map[common.Hash]int)
-
-	length := len(fi.iterators)
+	// Position all iterators and track how many remain live
 	for i := 0; i < len(fi.iterators); i++ {
-		//for i, it := range fi.iterators {
+		// Retrieve the first element and if it clashes with a previous iterator,
+		// advance either the current one or the old one. Repeat until nothing is
+		// clashing any more.
 		it := fi.iterators[i]
-		it.it.Seek(key)
 		for {
+			// If the iterator is exhausted, drop it off the end
 			if !it.it.Next() {
-				// To be removed
-				// swap it to the last position for now
-				fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i]
-				length--
+				it.it.Release()
+				last := len(fi.iterators) - 1
+
+				fi.iterators[i] = fi.iterators[last]
+				fi.iterators[last] = nil
+				fi.iterators = fi.iterators[:last]
+
+				i--
 				break
 			}
-			v := it.it.Key()
-			if other, exist := seen[v]; !exist {
-				seen[v] = i
+			// The iterator is still alive, check for collisions with previous ones
+			hash := it.it.Hash()
+			if other, exist := positioned[hash]; !exist {
+				positioned[hash] = i
 				break
 			} else {
+				// Iterators collide, one needs to be progressed, use priority to
+				// determine which.
+				//
 				// This whole else-block can be avoided, if we instead
 				// do an inital priority-sort of the iterators. If we do that,
 				// then we'll only wind up here if a lower-priority (preferred) iterator
 				// has the same value, and then we will always just continue.
 				// However, it costs an extra sort, so it's probably not better
-
-				// One needs to be progressed, use priority to determine which
 				if fi.iterators[other].priority < it.priority {
-					// the 'it' should be progressed
+					// The 'it' should be progressed
 					continue
 				} else {
-					// the 'other' should be progressed - swap them
+					// The 'other' should be progressed, swap them
 					it = fi.iterators[other]
 					fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other]
 					continue
@@ -115,15 +146,12 @@ func (fi *fastAccountIterator) Seek(key common.Hash) {
 			}
 		}
 	}
-	// Now remove those that were placed in the end
-	fi.iterators = fi.iterators[:length]
-	// The list is now totally unsorted, need to re-sort the entire list
-	sort.Sort(fi)
+	// Re-sort the entire list
+	sort.Sort(fi.iterators)
 	fi.initiated = false
 }
 
-// Next implements the Iterator interface. It returns false if no more elemnts
-// can be retrieved (false == exhausted)
+// Next steps the iterator forward one element, returning false if exhausted.
 func (fi *fastAccountIterator) Next() bool {
 	if len(fi.iterators) == 0 {
 		return false
@@ -134,101 +162,88 @@ func (fi *fastAccountIterator) Next() bool {
 		fi.initiated = true
 		return true
 	}
-	return fi.innerNext(0)
+	return fi.next(0)
 }
 
-// innerNext handles the next operation internally,
-// and should be invoked when we know that two elements in the list may have
-// the same value.
-// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke
-// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue
-// along the list and apply the same operation if needed
-func (fi *fastAccountIterator) innerNext(pos int) bool {
-	if !fi.iterators[pos].it.Next() {
-		//Exhausted, remove this iterator
-		fi.remove(pos)
-		if len(fi.iterators) == 0 {
-			return false
-		}
+// next handles the next operation internally and should be invoked when we know
+// that two elements in the list may have the same value.
+//
+// For example, if the iterated hashes become [2,3,5,5,8,9,10], then we should
+// invoke next(3), which will call Next on elem 3 (the second '5') and will
+// cascade along the list, applying the same operation if needed.
+func (fi *fastAccountIterator) next(idx int) bool {
+	// If this particular iterator got exhausted, remove it and return true (the
+	// next one is surely not exhausted yet, otherwise it would have been removed
+	// already).
+	if it := fi.iterators[idx].it; !it.Next() {
+		it.Release()
+
+		fi.iterators = append(fi.iterators[:idx], fi.iterators[idx+1:]...)
+		return len(fi.iterators) > 0
+	}
+	// If there's noone left to cascade into, return
+	if idx == len(fi.iterators)-1 {
 		return true
 	}
-	if pos == len(fi.iterators)-1 {
-		// Only one iterator left
-		return true
-	}
-	// We next:ed the elem at 'pos'. Now we may have to re-sort that elem
+	// We next-ed the iterator at 'idx', now we may have to re-sort that element
 	var (
-		current, neighbour = fi.iterators[pos], fi.iterators[pos+1]
-		val, neighbourVal  = current.it.Key(), neighbour.it.Key()
+		cur, next         = fi.iterators[idx], fi.iterators[idx+1]
+		curHash, nextHash = cur.it.Hash(), next.it.Hash()
 	)
-	if diff := bytes.Compare(val[:], neighbourVal[:]); diff < 0 {
+	if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 {
 		// It is still in correct place
 		return true
-	} else if diff == 0 && current.priority < neighbour.priority {
-		// So still in correct place, but we need to iterate on the neighbour
-		fi.innerNext(pos + 1)
+	} else if diff == 0 && cur.priority < next.priority {
+		// So still in correct place, but we need to iterate on the next
+		fi.next(idx + 1)
 		return true
 	}
-	// At this point, the elem is in the wrong location, but the
-	// remaining list is sorted. Find out where to move the elem
-	iteratee := -1
+	// At this point, the iterator is in the wrong location, but the remaining
+	// list is sorted. Find out where to move the item.
+	clash := -1
 	index := sort.Search(len(fi.iterators), func(n int) bool {
-		if n < pos {
-			// No need to search 'behind' us
+		// The iterator always advances forward, so anything before the old slot
+		// is known to be behind us, so just skip them altogether. This actually
+		// is an important clause since the sort order got invalidated.
+		if n < idx {
 			return false
 		}
 		if n == len(fi.iterators)-1 {
 			// Can always place an elem last
 			return true
 		}
-		neighbour := fi.iterators[n+1].it.Key()
-		if diff := bytes.Compare(val[:], neighbour[:]); diff < 0 {
+		nextHash := fi.iterators[n+1].it.Hash()
+		if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 {
 			return true
 		} else if diff > 0 {
 			return false
 		}
 		// The elem we're placing it next to has the same value,
 		// so whichever winds up on n+1 will need further iteraton
-		iteratee = n + 1
-		if current.priority < fi.iterators[n+1].priority {
+		clash = n + 1
+		if cur.priority < fi.iterators[n+1].priority {
 			// We can drop the iterator here
 			return true
 		}
 		// We need to move it one step further
 		return false
 		// TODO benchmark which is best, this works too:
-		//iteratee = n
+		//clash = n
 		//return true
 		// Doing so should finish the current search earlier
 	})
-	fi.move(pos, index)
-	if iteratee != -1 {
-		fi.innerNext(iteratee)
+	fi.move(idx, index)
+	if clash != -1 {
+		fi.next(clash)
 	}
 	return true
 }
 
-// move moves an iterator to another position in the list
+// move advances an iterator to another position in the list.
 func (fi *fastAccountIterator) move(index, newpos int) {
-	if newpos > len(fi.iterators)-1 {
-		newpos = len(fi.iterators) - 1
-	}
-	var (
-		elem   = fi.iterators[index]
-		middle = fi.iterators[index+1 : newpos+1]
-		suffix []*weightedIterator
-	)
-	if newpos < len(fi.iterators)-1 {
-		suffix = fi.iterators[newpos+1:]
-	}
-	fi.iterators = append(fi.iterators[:index], middle...)
-	fi.iterators = append(fi.iterators, elem)
-	fi.iterators = append(fi.iterators, suffix...)
-}
-
-// remove drops an iterator from the list
-func (fi *fastAccountIterator) remove(index int) {
-	fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...)
+	elem := fi.iterators[index]
+	copy(fi.iterators[index:], fi.iterators[index+1:newpos+1])
+	fi.iterators[newpos] = elem
 }
 
 // Error returns any failure that occurred during iteration, which might have
@@ -237,20 +252,29 @@ func (fi *fastAccountIterator) Error() error {
 	return fi.fail
 }
 
-// Key returns the current key
-func (fi *fastAccountIterator) Key() common.Hash {
-	return fi.iterators[0].it.Key()
+// Hash returns the current key
+func (fi *fastAccountIterator) Hash() common.Hash {
+	return fi.iterators[0].it.Hash()
 }
 
-// Value returns the current key
-func (fi *fastAccountIterator) Value() []byte {
-	return fi.iterators[0].it.Value()
+// Account returns the current key
+func (fi *fastAccountIterator) Account() []byte {
+	return fi.iterators[0].it.Account()
+}
+
+// Release iterates over all the remaining live layer iterators and releases each
+// of thme individually.
+func (fi *fastAccountIterator) Release() {
+	for _, it := range fi.iterators {
+		it.it.Release()
+	}
+	fi.iterators = nil
 }
 
 // Debug is a convencience helper during testing
 func (fi *fastAccountIterator) Debug() {
 	for _, it := range fi.iterators {
-		fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Key()[0])
+		fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Hash()[0])
 	}
 	fmt.Println()
 }
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
index 01e525653..902985cf6 100644
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
@@ -23,7 +23,9 @@ import (
 	"math/rand"
 	"testing"
 
+	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
 )
 
 // TestIteratorBasics tests some simple single-layer iteration
@@ -47,7 +49,7 @@ func TestIteratorBasics(t *testing.T) {
 	}
 	// Add some (identical) layers on top
 	parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
-	it := parent.newAccountIterator()
+	it := parent.AccountIterator(common.Hash{})
 	verifyIterator(t, 100, it)
 }
 
@@ -75,14 +77,16 @@ func (ti *testIterator) Error() error {
 	panic("implement me")
 }
 
-func (ti *testIterator) Key() common.Hash {
+func (ti *testIterator) Hash() common.Hash {
 	return common.BytesToHash([]byte{ti.values[0]})
 }
 
-func (ti *testIterator) Value() []byte {
+func (ti *testIterator) Account() []byte {
 	panic("implement me")
 }
 
+func (ti *testIterator) Release() {}
+
 func TestFastIteratorBasics(t *testing.T) {
 	type testCase struct {
 		lists   [][]byte
@@ -96,10 +100,10 @@ func TestFastIteratorBasics(t *testing.T) {
 			{9, 10}, {10, 13, 15, 16}},
 			expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
 	} {
-		var iterators []*weightedIterator
+		var iterators []*weightedAccountIterator
 		for i, data := range tc.lists {
 			it := newTestIterator(data...)
-			iterators = append(iterators, &weightedIterator{it, i})
+			iterators = append(iterators, &weightedAccountIterator{it, i})
 
 		}
 		fi := &fastAccountIterator{
@@ -108,7 +112,7 @@ func TestFastIteratorBasics(t *testing.T) {
 		}
 		count := 0
 		for fi.Next() {
-			if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got {
+			if got, exp := fi.Hash()[31], tc.expKeys[count]; exp != got {
 				t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
 			}
 			count++
@@ -117,68 +121,86 @@ func TestFastIteratorBasics(t *testing.T) {
 }
 
 func verifyIterator(t *testing.T, expCount int, it AccountIterator) {
+	t.Helper()
+
 	var (
-		i    = 0
-		last = common.Hash{}
+		count = 0
+		last  = common.Hash{}
 	)
 	for it.Next() {
-		v := it.Key()
-		if bytes.Compare(last[:], v[:]) >= 0 {
-			t.Errorf("Wrong order:\n%x \n>=\n%x", last, v)
+		if hash := it.Hash(); bytes.Compare(last[:], hash[:]) >= 0 {
+			t.Errorf("wrong order: %x >= %x", last, hash)
 		}
-		i++
+		count++
 	}
-	if i != expCount {
-		t.Errorf("iterator len wrong, expected %d, got %d", expCount, i)
+	if count != expCount {
+		t.Errorf("iterator count mismatch: have %d, want %d", count, expCount)
+	}
+	if err := it.Error(); err != nil {
+		t.Errorf("iterator failed: %v", err)
 	}
 }
 
-// TestIteratorTraversal tests some simple multi-layer iteration
+// TestIteratorTraversal tests some simple multi-layer iteration.
 func TestIteratorTraversal(t *testing.T) {
-	var (
-		storage = make(map[common.Hash]map[common.Hash][]byte)
-	)
-
-	mkAccounts := func(args ...string) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for _, h := range args {
-			accounts[common.HexToHash(h)] = randomAccount()
-		}
-		return accounts
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
-	// entries in multiple layers should only become output once
-	parent := newDiffLayer(emptyLayer(), common.Hash{},
-		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
 
-	child := parent.Update(common.Hash{},
-		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
 
-	child = child.Update(common.Hash{},
-		mkAccounts("0xcc", "0xf0", "0xff"), storage)
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
 
-	// single layer iterator
-	verifyIterator(t, 3, child.newAccountIterator())
-	// multi-layered binary iterator
-	verifyIterator(t, 7, child.newBinaryAccountIterator())
-	// multi-layered fast iterator
-	verifyIterator(t, 7, child.newFastAccountIterator())
+	// Verify the single and multi-layer iterators
+	head := snaps.Snapshot(common.HexToHash("0x04"))
+
+	verifyIterator(t, 3, head.(snapshot).AccountIterator(common.Hash{}))
+	verifyIterator(t, 7, head.(*diffLayer).newBinaryAccountIterator())
+
+	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	defer it.Release()
+
+	verifyIterator(t, 7, it)
 }
 
 // TestIteratorTraversalValues tests some multi-layer iteration, where we
-// also expect the correct values to show up
+// also expect the correct values to show up.
 func TestIteratorTraversalValues(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Create a batch of account sets to seed subsequent layers with
 	var (
-		storage = make(map[common.Hash]map[common.Hash][]byte)
-		a       = make(map[common.Hash][]byte)
-		b       = make(map[common.Hash][]byte)
-		c       = make(map[common.Hash][]byte)
-		d       = make(map[common.Hash][]byte)
-		e       = make(map[common.Hash][]byte)
-		f       = make(map[common.Hash][]byte)
-		g       = make(map[common.Hash][]byte)
-		h       = make(map[common.Hash][]byte)
+		a = make(map[common.Hash][]byte)
+		b = make(map[common.Hash][]byte)
+		c = make(map[common.Hash][]byte)
+		d = make(map[common.Hash][]byte)
+		e = make(map[common.Hash][]byte)
+		f = make(map[common.Hash][]byte)
+		g = make(map[common.Hash][]byte)
+		h = make(map[common.Hash][]byte)
 	)
-	// entries in multiple layers should only become output once
 	for i := byte(2); i < 0xff; i++ {
 		a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i))
 		if i > 20 && i%2 == 0 {
@@ -203,35 +225,36 @@ func TestIteratorTraversalValues(t *testing.T) {
 			h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i))
 		}
 	}
-	child := newDiffLayer(emptyLayer(), common.Hash{}, a, storage).
-		Update(common.Hash{}, b, storage).
-		Update(common.Hash{}, c, storage).
-		Update(common.Hash{}, d, storage).
-		Update(common.Hash{}, e, storage).
-		Update(common.Hash{}, f, storage).
-		Update(common.Hash{}, g, storage).
-		Update(common.Hash{}, h, storage)
+	// Assemble a stack of snapshots from the account layers
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), a, nil)
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), b, nil)
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), c, nil)
+	snaps.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), d, nil)
+	snaps.Update(common.HexToHash("0x06"), common.HexToHash("0x05"), e, nil)
+	snaps.Update(common.HexToHash("0x07"), common.HexToHash("0x06"), f, nil)
+	snaps.Update(common.HexToHash("0x08"), common.HexToHash("0x07"), g, nil)
+	snaps.Update(common.HexToHash("0x09"), common.HexToHash("0x08"), h, nil)
 
-	it := child.newFastAccountIterator()
+	it, _ := snaps.AccountIterator(common.HexToHash("0x09"), common.Hash{})
+	defer it.Release()
+
+	head := snaps.Snapshot(common.HexToHash("0x09"))
 	for it.Next() {
-		key := it.Key()
-		exp, err := child.accountRLP(key, 0)
+		hash := it.Hash()
+		want, err := head.AccountRLP(hash)
 		if err != nil {
-			t.Fatal(err)
+			t.Fatalf("failed to retrieve expected account: %v", err)
 		}
-		got := it.Value()
-		if !bytes.Equal(exp, got) {
-			t.Fatalf("Error on key %x, got %v exp %v", key, string(got), string(exp))
+		if have := it.Account(); !bytes.Equal(want, have) {
+			t.Fatalf("hash %x: account mismatch: have %x, want %x", hash, have, want)
 		}
-		//fmt.Printf("val: %v\n", string(it.Value()))
 	}
 }
 
+// This testcase is notorious, all layers contain the exact same 200 accounts.
 func TestIteratorLargeTraversal(t *testing.T) {
-	// This testcase is a bit notorious -- all layers contain the exact
-	// same 200 accounts.
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-	mkAccounts := func(num int) map[common.Hash][]byte {
+	// Create a custom account factory to recreate the same addresses
+	makeAccounts := func(num int) map[common.Hash][]byte {
 		accounts := make(map[common.Hash][]byte)
 		for i := 0; i < num; i++ {
 			h := common.Hash{}
@@ -240,25 +263,121 @@ func TestIteratorLargeTraversal(t *testing.T) {
 		}
 		return accounts
 	}
-	parent := newDiffLayer(emptyLayer(), common.Hash{},
-		mkAccounts(200), storage)
-	child := parent.Update(common.Hash{},
-		mkAccounts(200), storage)
-	for i := 2; i < 100; i++ {
-		child = child.Update(common.Hash{},
-			mkAccounts(200), storage)
+	// Build up a large stack of snapshots
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
 	}
-	// single layer iterator
-	verifyIterator(t, 200, child.newAccountIterator())
-	// multi-layered binary iterator
-	verifyIterator(t, 200, child.newBinaryAccountIterator())
-	// multi-layered fast iterator
-	verifyIterator(t, 200, child.newFastAccountIterator())
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	for i := 1; i < 128; i++ {
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), makeAccounts(200), nil)
+	}
+	// Iterate the entire stack and ensure everything is hit only once
+	head := snaps.Snapshot(common.HexToHash("0x80"))
+	verifyIterator(t, 200, head.(snapshot).AccountIterator(common.Hash{}))
+	verifyIterator(t, 200, head.(*diffLayer).newBinaryAccountIterator())
+
+	it, _ := snaps.AccountIterator(common.HexToHash("0x80"), common.Hash{})
+	defer it.Release()
+
+	verifyIterator(t, 200, it)
 }
 
-// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact
-// same 200 accounts. That means that we need to process 2000 items, but only
-// spit out 200 values eventually.
+// TestIteratorFlattening tests what happens when we
+// - have a live iterator on child C (parent C1 -> C2 .. CN)
+// - flattens C2 all the way into CN
+// - continues iterating
+func TestIteratorFlattening(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Create a stack of diffs on top
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
+
+	// Create an iterator and flatten the data from underneath it
+	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	defer it.Release()
+
+	if err := snaps.Cap(common.HexToHash("0x04"), 1); err != nil {
+		t.Fatalf("failed to flatten snapshot stack: %v", err)
+	}
+	//verifyIterator(t, 7, it)
+}
+
+func TestIteratorSeek(t *testing.T) {
+	// Create a snapshot stack with some initial data
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
+
+	// Construct various iterators and ensure their tranversal is correct
+	it, _ := snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xdd"))
+	defer it.Release()
+	verifyIterator(t, 3, it) // expected: ee, f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xaa"))
+	defer it.Release()
+	verifyIterator(t, 3, it) // expected: ee, f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xff"))
+	defer it.Release()
+	verifyIterator(t, 0, it) // expected: nothing
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xbb"))
+	defer it.Release()
+	verifyIterator(t, 5, it) // expected: cc, dd, ee, f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xef"))
+	defer it.Release()
+	verifyIterator(t, 2, it) // expected: f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xf0"))
+	defer it.Release()
+	verifyIterator(t, 1, it) // expected: ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xff"))
+	defer it.Release()
+	verifyIterator(t, 0, it) // expected: nothing
+}
+
+// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the
+// exact same 200 accounts. That means that we need to process 2000 items, but
+// only spit out 200 values eventually.
 //
 // The value-fetching benchmark is easy on the binary iterator, since it never has to reach
 // down at any depth for retrieving the values -- all are on the toppmost layer
@@ -267,12 +386,9 @@ func TestIteratorLargeTraversal(t *testing.T) {
 // BenchmarkIteratorTraversal/binary_iterator_values-6       	    2403	    501810 ns/op
 // BenchmarkIteratorTraversal/fast_iterator_keys-6           	    1923	    677966 ns/op
 // BenchmarkIteratorTraversal/fast_iterator_values-6         	    1741	    649967 ns/op
-//
 func BenchmarkIteratorTraversal(b *testing.B) {
-
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-
-	mkAccounts := func(num int) map[common.Hash][]byte {
+	// Create a custom account factory to recreate the same addresses
+	makeAccounts := func(num int) map[common.Hash][]byte {
 		accounts := make(map[common.Hash][]byte)
 		for i := 0; i < num; i++ {
 			h := common.Hash{}
@@ -281,24 +397,29 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 		}
 		return accounts
 	}
-	parent := newDiffLayer(emptyLayer(), common.Hash{},
-		mkAccounts(200), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts(200), storage)
-
-	for i := 2; i < 100; i++ {
-		child = child.Update(common.Hash{},
-			mkAccounts(200), storage)
-
+	// Build up a large stack of snapshots
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	for i := 1; i <= 100; i++ {
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), makeAccounts(200), nil)
 	}
 	// We call this once before the benchmark, so the creation of
 	// sorted accountlists are not included in the results.
-	child.newBinaryAccountIterator()
+	head := snaps.Snapshot(common.HexToHash("0x65"))
+	head.(*diffLayer).newBinaryAccountIterator()
+
 	b.Run("binary iterator keys", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
-			it := child.newBinaryAccountIterator()
+			it := head.(*diffLayer).newBinaryAccountIterator()
 			for it.Next() {
 				got++
 			}
@@ -310,10 +431,10 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 	b.Run("binary iterator values", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
-			it := child.newBinaryAccountIterator()
+			it := head.(*diffLayer).newBinaryAccountIterator()
 			for it.Next() {
 				got++
-				child.accountRLP(it.Key(), 0)
+				head.(*diffLayer).accountRLP(it.Hash(), 0)
 			}
 			if exp := 200; got != exp {
 				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
@@ -322,8 +443,10 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 	})
 	b.Run("fast iterator keys", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
 			got := 0
-			it := child.newFastAccountIterator()
 			for it.Next() {
 				got++
 			}
@@ -334,11 +457,13 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 	})
 	b.Run("fast iterator values", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
 			got := 0
-			it := child.newFastAccountIterator()
 			for it.Next() {
 				got++
-				it.Value()
+				it.Account()
 			}
 			if exp := 200; got != exp {
 				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
@@ -354,13 +479,12 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 // call recursively 100 times for the majority of the values
 //
 // BenchmarkIteratorLargeBaselayer/binary_iterator_(keys)-6         	     514	   1971999 ns/op
-// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6           	   10000	    114385 ns/op
 // BenchmarkIteratorLargeBaselayer/binary_iterator_(values)-6       	      61	  18997492 ns/op
+// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6           	   10000	    114385 ns/op
 // BenchmarkIteratorLargeBaselayer/fast_iterator_(values)-6         	    4047	    296823 ns/op
 func BenchmarkIteratorLargeBaselayer(b *testing.B) {
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-
-	mkAccounts := func(num int) map[common.Hash][]byte {
+	// Create a custom account factory to recreate the same addresses
+	makeAccounts := func(num int) map[common.Hash][]byte {
 		accounts := make(map[common.Hash][]byte)
 		for i := 0; i < num; i++ {
 			h := common.Hash{}
@@ -369,37 +493,30 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 		}
 		return accounts
 	}
-
-	parent := newDiffLayer(emptyLayer(), common.Hash{},
-		mkAccounts(2000), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts(20), storage)
-
-	for i := 2; i < 100; i++ {
-		child = child.Update(common.Hash{},
-			mkAccounts(20), storage)
-
+	// Build up a large stack of snapshots
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), makeAccounts(2000), nil)
+	for i := 2; i <= 100; i++ {
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), makeAccounts(20), nil)
 	}
 	// We call this once before the benchmark, so the creation of
 	// sorted accountlists are not included in the results.
-	child.newBinaryAccountIterator()
+	head := snaps.Snapshot(common.HexToHash("0x65"))
+	head.(*diffLayer).newBinaryAccountIterator()
+
 	b.Run("binary iterator (keys)", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
-			it := child.newBinaryAccountIterator()
-			for it.Next() {
-				got++
-			}
-			if exp := 2000; got != exp {
-				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
-			}
-		}
-	})
-	b.Run("fast iterator (keys)", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			got := 0
-			it := child.newFastAccountIterator()
+			it := head.(*diffLayer).newBinaryAccountIterator()
 			for it.Next() {
 				got++
 			}
@@ -411,24 +528,39 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 	b.Run("binary iterator (values)", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			got := 0
-			it := child.newBinaryAccountIterator()
+			it := head.(*diffLayer).newBinaryAccountIterator()
 			for it.Next() {
 				got++
-				v := it.Key()
-				child.accountRLP(v, -0)
+				v := it.Hash()
+				head.(*diffLayer).accountRLP(v, 0)
 			}
 			if exp := 2000; got != exp {
 				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
 			}
 		}
 	})
+	b.Run("fast iterator (keys)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
 
+			got := 0
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
 	b.Run("fast iterator (values)", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
 			got := 0
-			it := child.newFastAccountIterator()
 			for it.Next() {
-				it.Value()
+				it.Account()
 				got++
 			}
 			if exp := 2000; got != exp {
@@ -438,117 +570,38 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 	})
 }
 
-// TestIteratorFlatting tests what happens when we
-// - have a live iterator on child C (parent C1 -> C2 .. CN)
-// - flattens C2 all the way into CN
-// - continues iterating
-// Right now, this "works" simply because the keys do not change -- the
-// iterator is not aware that a layer has become stale. This naive
-// solution probably won't work in the long run, however
-func TestIteratorFlattning(t *testing.T) {
-	var (
-		storage = make(map[common.Hash]map[common.Hash][]byte)
-	)
-	mkAccounts := func(args ...string) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for _, h := range args {
-			accounts[common.HexToHash(h)] = randomAccount()
-		}
-		return accounts
-	}
-	// entries in multiple layers should only become output once
-	parent := newDiffLayer(emptyLayer(), common.Hash{},
-		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
-
-	child = child.Update(common.Hash{},
-		mkAccounts("0xcc", "0xf0", "0xff"), storage)
-
-	it := child.newFastAccountIterator()
-	child.parent.(*diffLayer).flatten()
-	// The parent should now be stale
-	verifyIterator(t, 7, it)
-}
-
-func TestIteratorSeek(t *testing.T) {
-	storage := make(map[common.Hash]map[common.Hash][]byte)
-	mkAccounts := func(args ...string) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for _, h := range args {
-			accounts[common.HexToHash(h)] = randomAccount()
-		}
-		return accounts
-	}
-	parent := newDiffLayer(emptyLayer(), common.Hash{},
-		mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage)
-	it := AccountIterator(parent.newAccountIterator())
-	// expected: ee, f0, ff
-	it.Seek(common.HexToHash("0xdd"))
-	verifyIterator(t, 3, it)
-
-	it = parent.newAccountIterator()
-	// expected: ee, f0, ff
-	it.Seek(common.HexToHash("0xaa"))
-	verifyIterator(t, 3, it)
-
-	it = parent.newAccountIterator()
-	// expected: nothing
-	it.Seek(common.HexToHash("0xff"))
-	verifyIterator(t, 0, it)
-
-	child := parent.Update(common.Hash{},
-		mkAccounts("0xbb", "0xdd", "0xf0"), storage)
-
-	child = child.Update(common.Hash{},
-		mkAccounts("0xcc", "0xf0", "0xff"), storage)
-
-	it = child.newFastAccountIterator()
-	// expected: cc, dd, ee, f0, ff
-	it.Seek(common.HexToHash("0xbb"))
-	verifyIterator(t, 5, it)
-
-	it = child.newFastAccountIterator()
-	it.Seek(common.HexToHash("0xef"))
-	// exp: f0, ff
-	verifyIterator(t, 2, it)
-
-	it = child.newFastAccountIterator()
-	it.Seek(common.HexToHash("0xf0"))
-	verifyIterator(t, 1, it)
-
-	it.Seek(common.HexToHash("0xff"))
-	verifyIterator(t, 0, it)
-}
-
-//BenchmarkIteratorSeek/init+seek-6         	    4328	    245477 ns/op
-func BenchmarkIteratorSeek(b *testing.B) {
-
-	var storage = make(map[common.Hash]map[common.Hash][]byte)
-	mkAccounts := func(num int) map[common.Hash][]byte {
-		accounts := make(map[common.Hash][]byte)
-		for i := 0; i < num; i++ {
-			h := common.Hash{}
-			binary.BigEndian.PutUint64(h[:], uint64(i+1))
-			accounts[h] = randomAccount()
-		}
-		return accounts
-	}
-	layer := newDiffLayer(emptyLayer(), common.Hash{}, mkAccounts(200), storage)
-	for i := 1; i < 100; i++ {
-		layer = layer.Update(common.Hash{},
-			mkAccounts(200), storage)
-	}
-	b.Run("init+seek", func(b *testing.B) {
-		b.ResetTimer()
-		seekpos := make([]byte, 20)
-		for i := 0; i < b.N; i++ {
-			b.StopTimer()
-			rand.Read(seekpos)
-			it := layer.newFastAccountIterator()
-			b.StartTimer()
-			it.Seek(common.BytesToHash(seekpos))
-		}
+/*
+func BenchmarkBinaryAccountIteration(b *testing.B) {
+	benchmarkAccountIteration(b, func(snap snapshot) AccountIterator {
+		return snap.(*diffLayer).newBinaryAccountIterator()
 	})
 }
+
+func BenchmarkFastAccountIteration(b *testing.B) {
+	benchmarkAccountIteration(b, newFastAccountIterator)
+}
+
+func benchmarkAccountIteration(b *testing.B, iterator func(snap snapshot) AccountIterator) {
+	// Create a diff stack and randomize the accounts across them
+	layers := make([]map[common.Hash][]byte, 128)
+	for i := 0; i < len(layers); i++ {
+		layers[i] = make(map[common.Hash][]byte)
+	}
+	for i := 0; i < b.N; i++ {
+		depth := rand.Intn(len(layers))
+		layers[depth][randomHash()] = randomAccount()
+	}
+	stack := snapshot(emptyLayer())
+	for _, layer := range layers {
+		stack = stack.Update(common.Hash{}, layer, nil)
+	}
+	// Reset the timers and report all the stats
+	it := iterator(stack)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for it.Next() {
+	}
+}
+*/
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 7650cf2c1..5f9a8be63 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -113,9 +113,17 @@ type Snapshot interface {
 type snapshot interface {
 	Snapshot
 
+	// Parent returns the subsequent layer of a snapshot, or nil if the base was
+	// reached.
+	//
+	// Note, the method is an internal helper to avoid type switching between the
+	// disk and diff layers. There is no locking involved.
+	Parent() snapshot
+
 	// Update creates a new layer on top of the existing snapshot diff tree with
-	// the specified data items. Note, the maps are retained by the method to avoid
-	// copying everything.
+	// the specified data items.
+	//
+	// Note, the maps are retained by the method to avoid copying everything.
 	Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
 
 	// Journal commits an entire diff hierarchy to disk into a single journal entry.
@@ -126,6 +134,9 @@ type snapshot interface {
 	// Stale return whether this layer has become stale (was flattened across) or
 	// if it's still live.
 	Stale() bool
+
+	// AccountIterator creates an account iterator over an arbitrary layer.
+	AccountIterator(seek common.Hash) AccountIterator
 }
 
 // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
@@ -170,15 +181,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
 	// Existing snapshot loaded, seed all the layers
 	for head != nil {
 		snap.layers[head.Root()] = head
-
-		switch self := head.(type) {
-		case *diffLayer:
-			head = self.parent
-		case *diskLayer:
-			head = nil
-		default:
-			panic(fmt.Sprintf("unknown data layer: %T", self))
-		}
+		head = head.Parent()
 	}
 	return snap
 }
@@ -563,3 +566,9 @@ func (t *Tree) Rebuild(root common.Hash) {
 		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
 	}
 }
+
+// AccountIterator creates a new account iterator for the specified root hash and
+// seeks to a starting account hash.
+func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
+	return newFastAccountIterator(t, root, seek)
+}
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index 44b8f3cef..2b1482817 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -18,13 +18,48 @@ package snapshot
 
 import (
 	"fmt"
+	"math/big"
+	"math/rand"
 	"testing"
 
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/rlp"
 )
 
+// randomHash generates a random blob of data and returns it as a hash.
+func randomHash() common.Hash {
+	var hash common.Hash
+	if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil {
+		panic(err)
+	}
+	return hash
+}
+
+// randomAccount generates a random account and returns it RLP encoded.
+func randomAccount() []byte {
+	root := randomHash()
+	a := Account{
+		Balance:  big.NewInt(rand.Int63()),
+		Nonce:    rand.Uint64(),
+		Root:     root[:],
+		CodeHash: emptyCode[:],
+	}
+	data, _ := rlp.EncodeToBytes(a)
+	return data
+}
+
+// randomAccountSet generates a set of random accounts with the given strings as
+// the account address hashes.
+func randomAccountSet(hashes ...string) map[common.Hash][]byte {
+	accounts := make(map[common.Hash][]byte)
+	for _, hash := range hashes {
+		accounts[common.HexToHash(hash)] = randomAccount()
+	}
+	return accounts
+}
+
 // Tests that if a disk layer becomes stale, no active external references will
 // be returned with junk data. This version of the test flattens every diff layer
 // to check internal corner case around the bottom-most memory accumulator.
@@ -46,8 +81,7 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	storage := make(map[common.Hash]map[common.Hash][]byte)
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 2 {
@@ -91,11 +125,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	storage := make(map[common.Hash]map[common.Hash][]byte)
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 3 {
@@ -140,11 +173,10 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	storage := make(map[common.Hash]map[common.Hash][]byte)
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 3 {
@@ -188,14 +220,13 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	storage := make(map[common.Hash]map[common.Hash][]byte)
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), accounts, storage); err != nil {
+	if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 4 {
diff --git a/core/state/snapshot/wipe_test.go b/core/state/snapshot/wipe_test.go
index f12769a95..cb6e174b3 100644
--- a/core/state/snapshot/wipe_test.go
+++ b/core/state/snapshot/wipe_test.go
@@ -25,15 +25,6 @@ import (
 	"github.com/ethereum/go-ethereum/ethdb/memorydb"
 )
 
-// randomHash generates a random blob of data and returns it as a hash.
-func randomHash() common.Hash {
-	var hash common.Hash
-	if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil {
-		panic(err)
-	}
-	return hash
-}
-
 // Tests that given a database with random data content, all parts of a snapshot
 // can be crrectly wiped without touching anything else.
 func TestWipe(t *testing.T) {

From 19099421dc9d1c0818002fa7de948e056e1eee61 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Sun, 19 Jan 2020 20:57:56 +0100
Subject: [PATCH 16/28] core/state/snapshot: faster account iteration, CLI
 integration

---
 cmd/geth/chaincmd.go                 |  1 +
 cmd/geth/main.go                     |  1 +
 cmd/utils/flags.go                   | 10 +++++++++
 core/blockchain.go                   | 19 ++++++++++------
 core/state/snapshot/difflayer.go     | 16 +++++---------
 core/state/snapshot/iterator.go      | 33 ++++++++++++++++------------
 core/state/snapshot/iterator_fast.go | 22 ++++++++++++++-----
 core/state/snapshot/journal.go       |  2 +-
 core/state/snapshot/snapshot.go      |  3 ++-
 9 files changed, 69 insertions(+), 38 deletions(-)

diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go
index 9d4835a16..c5ae550e3 100644
--- a/cmd/geth/chaincmd.go
+++ b/cmd/geth/chaincmd.go
@@ -79,6 +79,7 @@ The dumpgenesis command dumps the genesis block configuration in JSON format to
 			utils.CacheFlag,
 			utils.SyncModeFlag,
 			utils.GCModeFlag,
+			utils.SnapshotFlag,
 			utils.CacheDatabaseFlag,
 			utils.CacheGCFlag,
 		},
diff --git a/cmd/geth/main.go b/cmd/geth/main.go
index 36187e484..3615a9166 100644
--- a/cmd/geth/main.go
+++ b/cmd/geth/main.go
@@ -91,6 +91,7 @@ var (
 		utils.SyncModeFlag,
 		utils.ExitWhenSyncedFlag,
 		utils.GCModeFlag,
+		utils.SnapshotFlag,
 		utils.LightServeFlag,
 		utils.LightLegacyServFlag,
 		utils.LightIngressFlag,
diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go
index 22fe677fa..cbbe53070 100644
--- a/cmd/utils/flags.go
+++ b/cmd/utils/flags.go
@@ -225,6 +225,10 @@ var (
 		Usage: `Blockchain garbage collection mode ("full", "archive")`,
 		Value: "full",
 	}
+	SnapshotFlag = cli.BoolFlag{
+		Name:  "snapshot",
+		Usage: `Enables snapshot-database mode -- experimental work in progress feature`,
+	}
 	LightKDFFlag = cli.BoolFlag{
 		Name:  "lightkdf",
 		Usage: "Reduce key-derivation RAM & CPU usage at some expense of KDF strength",
@@ -1471,6 +1475,9 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *eth.Config) {
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheSnapshotFlag.Name) {
 		cfg.SnapshotCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheSnapshotFlag.Name) / 100
 	}
+	if !ctx.GlobalIsSet(SnapshotFlag.Name) {
+		cfg.SnapshotCache = 0 // Disabled
+	}
 	if ctx.GlobalIsSet(DocRootFlag.Name) {
 		cfg.DocRoot = ctx.GlobalString(DocRootFlag.Name)
 	}
@@ -1734,6 +1741,9 @@ func MakeChain(ctx *cli.Context, stack *node.Node) (chain *core.BlockChain, chai
 		TrieTimeLimit:       eth.DefaultConfig.TrieTimeout,
 		SnapshotLimit:       eth.DefaultConfig.SnapshotCache,
 	}
+	if !ctx.GlobalIsSet(SnapshotFlag.Name) {
+		cache.SnapshotLimit = 0 // Disabled
+	}
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheTrieFlag.Name) {
 		cache.TrieCleanLimit = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheTrieFlag.Name) / 100
 	}
diff --git a/core/blockchain.go b/core/blockchain.go
index f868f7301..491eccecd 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -302,8 +302,9 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 		}
 	}
 	// Load any existing snapshot, regenerating it if loading failed
-	bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
-
+	if bc.cacheConfig.SnapshotLimit > 0 {
+		bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
+	}
 	// Take ownership of this particular state
 	go bc.update()
 	return bc, nil
@@ -498,8 +499,9 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error {
 	bc.chainmu.Unlock()
 
 	// Destroy any existing state snapshot and regenerate it in the background
-	bc.snaps.Rebuild(block.Root())
-
+	if bc.snaps != nil {
+		bc.snaps.Rebuild(block.Root())
+	}
 	log.Info("Committed new head block", "number", block.Number(), "hash", hash)
 	return nil
 }
@@ -854,9 +856,12 @@ func (bc *BlockChain) Stop() {
 	bc.wg.Wait()
 
 	// Ensure that the entirety of the state snapshot is journalled to disk.
-	snapBase, err := bc.snaps.Journal(bc.CurrentBlock().Root())
-	if err != nil {
-		log.Error("Failed to journal state snapshot", "err", err)
+	var snapBase common.Hash
+	if bc.snaps != nil {
+		var err error
+		if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
+			log.Error("Failed to journal state snapshot", "err", err)
+		}
 	}
 	// Ensure the state of a recent block is also stored to disk before exiting.
 	// We're writing three different states to catch different restart scenarios:
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 855d862de..3528a04a2 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -23,6 +23,7 @@ import (
 	"math/rand"
 	"sort"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"github.com/ethereum/go-ethereum/common"
@@ -92,7 +93,7 @@ type diffLayer struct {
 	memory uint64     // Approximate guess as to how much memory we use
 
 	root  common.Hash // Root hash to which this snapshot diff belongs to
-	stale bool        // Signals that the layer became stale (state progressed)
+	stale uint32      // Signals that the layer became stale (state progressed)
 
 	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
 	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
@@ -237,10 +238,7 @@ func (dl *diffLayer) Parent() snapshot {
 // Stale return whether this layer has become stale (was flattened across) or if
 // it's still live.
 func (dl *diffLayer) Stale() bool {
-	dl.lock.RLock()
-	defer dl.lock.RUnlock()
-
-	return dl.stale
+	return atomic.LoadUint32(&dl.stale) != 0
 }
 
 // Account directly retrieves the account associated with a particular hash in
@@ -288,7 +286,7 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
 
 	// If the layer was flattened into, consider it invalid (any live reference to
 	// the original should be marked as unusable).
-	if dl.stale {
+	if dl.Stale() {
 		return nil, ErrSnapshotStale
 	}
 	// If the account is known locally, return it. Note, a nil account means it was
@@ -342,7 +340,7 @@ func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([
 
 	// If the layer was flattened into, consider it invalid (any live reference to
 	// the original should be marked as unusable).
-	if dl.stale {
+	if dl.Stale() {
 		return nil, ErrSnapshotStale
 	}
 	// If the account is known locally, try to resolve the slot locally. Note, a nil
@@ -401,11 +399,9 @@ func (dl *diffLayer) flatten() snapshot {
 
 	// Before actually writing all our data to the parent, first ensure that the
 	// parent hasn't been 'corrupted' by someone else already flattening into it
-	if parent.stale {
+	if atomic.SwapUint32(&parent.stale, 1) != 0 {
 		panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo
 	}
-	parent.stale = true
-
 	// Overwrite all the updated accounts blindly, merge the sorted list
 	for hash, data := range dl.accountData {
 		parent.accountData[hash] = data
diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go
index 4005cb3ca..774e9f554 100644
--- a/core/state/snapshot/iterator.go
+++ b/core/state/snapshot/iterator.go
@@ -64,7 +64,7 @@ type diffAccountIterator struct {
 	// is explicitly tracked since the referenced diff layer might go stale after
 	// the iterator was positioned and we don't want to fail accessing the old
 	// value as long as the iterator is not touched any more.
-	curAccount []byte
+	//curAccount []byte
 
 	layer *diffLayer    // Live layer to retrieve values from
 	keys  []common.Hash // Keys left in the layer to iterate
@@ -98,22 +98,13 @@ func (it *diffAccountIterator) Next() bool {
 	if len(it.keys) == 0 {
 		return false
 	}
-	// Iterator seems to be still alive, retrieve and cache the live hash and
-	// account value, or fail now if layer became stale
-	it.layer.lock.RLock()
-	defer it.layer.lock.RUnlock()
-
-	if it.layer.stale {
+	if it.layer.Stale() {
 		it.fail, it.keys = ErrSnapshotStale, nil
 		return false
 	}
+	// Iterator seems to be still alive, retrieve and cache the live hash
 	it.curHash = it.keys[0]
-	if blob, ok := it.layer.accountData[it.curHash]; !ok {
-		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
-	} else {
-		it.curAccount = blob
-	}
-	// Values cached, shift the iterator and notify the user of success
+	// key cached, shift the iterator and notify the user of success
 	it.keys = it.keys[1:]
 	return true
 }
@@ -130,8 +121,22 @@ func (it *diffAccountIterator) Hash() common.Hash {
 }
 
 // Account returns the RLP encoded slim account the iterator is currently at.
+// This method may _fail_, if the underlying layer has been flattened between
+// the call to Next and Acccount. That type of error will set it.Err.
+// This method assumes that flattening does not delete elements from
+// the accountdata mapping (writing nil into it is fine though), and will panic
+// if elements have been deleted.
 func (it *diffAccountIterator) Account() []byte {
-	return it.curAccount
+	it.layer.lock.RLock()
+	blob, ok := it.layer.accountData[it.curHash]
+	if !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
+	}
+	it.layer.lock.RUnlock()
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+	}
+	return blob
 }
 
 // Release is a noop for diff account iterators as there are no held resources.
diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
index 676a3af17..b5ffab7c8 100644
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
@@ -63,8 +63,9 @@ func (its weightedAccountIterators) Swap(i, j int) {
 // fastAccountIterator is a more optimized multi-layer iterator which maintains a
 // direct mapping of all iterators leading down to the bottom layer.
 type fastAccountIterator struct {
-	tree *Tree       // Snapshot tree to reinitialize stale sub-iterators with
-	root common.Hash // Root hash to reinitialize stale sub-iterators through
+	tree       *Tree       // Snapshot tree to reinitialize stale sub-iterators with
+	root       common.Hash // Root hash to reinitialize stale sub-iterators through
+	curAccount []byte
 
 	iterators weightedAccountIterators
 	initiated bool
@@ -160,9 +161,20 @@ func (fi *fastAccountIterator) Next() bool {
 		// Don't forward first time -- we had to 'Next' once in order to
 		// do the sorting already
 		fi.initiated = true
-		return true
+		fi.curAccount = fi.iterators[0].it.Account()
+		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+			fi.fail = innerErr
+		}
+		return fi.Error() == nil
 	}
-	return fi.next(0)
+	if !fi.next(0) {
+		return false
+	}
+	fi.curAccount = fi.iterators[0].it.Account()
+	if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+		fi.fail = innerErr
+	}
+	return fi.Error() == nil
 }
 
 // next handles the next operation internally and should be invoked when we know
@@ -259,7 +271,7 @@ func (fi *fastAccountIterator) Hash() common.Hash {
 
 // Account returns the current key
 func (fi *fastAccountIterator) Account() []byte {
-	return fi.iterators[0].it.Account()
+	return fi.curAccount
 }
 
 // Release iterates over all the remaining live layer iterators and releases each
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
index 1c36e0623..01b88bae6 100644
--- a/core/state/snapshot/journal.go
+++ b/core/state/snapshot/journal.go
@@ -210,7 +210,7 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
 	dl.lock.RLock()
 	defer dl.lock.RUnlock()
 
-	if dl.stale {
+	if dl.Stale() {
 		return common.Hash{}, ErrSnapshotStale
 	}
 	// Everything below was journalled, persist this layer too
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index 5f9a8be63..ad602bbbb 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -22,6 +22,7 @@ import (
 	"errors"
 	"fmt"
 	"sync"
+	"sync/atomic"
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
@@ -552,7 +553,7 @@ func (t *Tree) Rebuild(root common.Hash) {
 		case *diffLayer:
 			// If the layer is a simple diff, simply mark as stale
 			layer.lock.Lock()
-			layer.stale = true
+			atomic.StoreUint32(&layer.stale, 1)
 			layer.lock.Unlock()
 
 		default:

From 06d4470b4146a4e2ec813a01cc101aeaff6ce1eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Mon, 24 Feb 2020 13:26:34 +0200
Subject: [PATCH 17/28] core: fix broken tests due to API changes + linter

---
 core/blockchain_test.go                |  2 +-
 core/state/iterator_test.go            |  2 +-
 core/state/snapshot/difflayer.go       |  2 +-
 core/state/snapshot/difflayer_test.go  |  2 +-
 core/state/snapshot/disklayer_test.go  |  8 ++--
 core/state/snapshot/generate.go        |  2 +-
 core/state/snapshot/iteration.md       | 60 --------------------------
 core/state/snapshot/iterator_binary.go |  2 +-
 core/state/snapshot/iterator_fast.go   | 16 ++-----
 core/state/snapshot/iterator_test.go   |  9 ++--
 core/state/snapshot/journal.go         |  2 +-
 core/state/snapshot/sort.go            | 56 ------------------------
 core/state/state_test.go               |  4 +-
 core/state/statedb_test.go             | 20 ++++-----
 core/state/sync_test.go                |  6 +--
 core/tx_pool_test.go                   | 38 ++++++++--------
 core/vm/gas_table_test.go              |  2 +-
 les/odr_test.go                        |  4 +-
 18 files changed, 55 insertions(+), 182 deletions(-)
 delete mode 100644 core/state/snapshot/iteration.md

diff --git a/core/blockchain_test.go b/core/blockchain_test.go
index de23ead21..4bed1b451 100644
--- a/core/blockchain_test.go
+++ b/core/blockchain_test.go
@@ -144,7 +144,7 @@ func testBlockChainImport(chain types.Blocks, blockchain *BlockChain) error {
 			}
 			return err
 		}
-		statedb, err := state.New(blockchain.GetBlockByHash(block.ParentHash()).Root(), blockchain.stateCache)
+		statedb, err := state.New(blockchain.GetBlockByHash(block.ParentHash()).Root(), blockchain.stateCache, nil)
 		if err != nil {
 			return err
 		}
diff --git a/core/state/iterator_test.go b/core/state/iterator_test.go
index 69f51c4c7..e9946e9b3 100644
--- a/core/state/iterator_test.go
+++ b/core/state/iterator_test.go
@@ -29,7 +29,7 @@ func TestNodeIteratorCoverage(t *testing.T) {
 	// Create some arbitrary test state to iterate
 	db, root, _ := makeTestState()
 
-	state, err := New(root, db)
+	state, err := New(root, db, nil)
 	if err != nil {
 		t.Fatalf("failed to create state trie at %x: %v", root, err)
 	}
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 3528a04a2..3c1bea421 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -478,7 +478,7 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
 
 	storageMap := dl.storageData[accountHash]
 	storageList := make([]common.Hash, 0, len(storageMap))
-	for k, _ := range storageMap {
+	for k := range storageMap {
 		storageList = append(storageList, k)
 	}
 	sort.Sort(hashes(storageList))
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 80a9b4093..d8212d317 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -167,7 +167,7 @@ func TestInsertAndMerge(t *testing.T) {
 	merged := (child.flatten()).(*diffLayer)
 	{ // Check that slot value is present
 		got, _ := merged.Storage(acc, slot)
-		if exp := []byte{0x01}; bytes.Compare(got, exp) != 0 {
+		if exp := []byte{0x01}; !bytes.Equal(got, exp) {
 			t.Errorf("merged slot value wrong, got %x, exp %x", got, exp)
 		}
 	}
diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go
index 30b690454..b8dded0d8 100644
--- a/core/state/snapshot/disklayer_test.go
+++ b/core/state/snapshot/disklayer_test.go
@@ -310,7 +310,7 @@ func TestDiskPartialMerge(t *testing.T) {
 			t.Helper()
 			blob, err := base.AccountRLP(account)
 			if bytes.Compare(account[:], genMarker) > 0 && err != ErrNotCoveredYet {
-				t.Fatalf("test %d: post-marker (%x) account access (%x) succeded: %x", i, genMarker, account, blob)
+				t.Fatalf("test %d: post-marker (%x) account access (%x) succeeded: %x", i, genMarker, account, blob)
 			}
 			if bytes.Compare(account[:], genMarker) <= 0 && !bytes.Equal(blob, data) {
 				t.Fatalf("test %d: pre-marker (%x) account access (%x) mismatch: have %x, want %x", i, genMarker, account, blob, data)
@@ -326,7 +326,7 @@ func TestDiskPartialMerge(t *testing.T) {
 			t.Helper()
 			blob, err := base.Storage(account, slot)
 			if bytes.Compare(append(account[:], slot[:]...), genMarker) > 0 && err != ErrNotCoveredYet {
-				t.Fatalf("test %d: post-marker (%x) storage access (%x:%x) succeded: %x", i, genMarker, account, slot, blob)
+				t.Fatalf("test %d: post-marker (%x) storage access (%x:%x) succeeded: %x", i, genMarker, account, slot, blob)
 			}
 			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 && !bytes.Equal(blob, data) {
 				t.Fatalf("test %d: pre-marker (%x) storage access (%x:%x) mismatch: have %x, want %x", i, genMarker, account, slot, blob, data)
@@ -386,7 +386,7 @@ func TestDiskPartialMerge(t *testing.T) {
 			t.Helper()
 			blob := rawdb.ReadAccountSnapshot(db, account)
 			if bytes.Compare(account[:], genMarker) > 0 && blob != nil {
-				t.Fatalf("test %d: post-marker (%x) account database access (%x) succeded: %x", i, genMarker, account, blob)
+				t.Fatalf("test %d: post-marker (%x) account database access (%x) succeeded: %x", i, genMarker, account, blob)
 			}
 			if bytes.Compare(account[:], genMarker) <= 0 && !bytes.Equal(blob, data) {
 				t.Fatalf("test %d: pre-marker (%x) account database access (%x) mismatch: have %x, want %x", i, genMarker, account, blob, data)
@@ -406,7 +406,7 @@ func TestDiskPartialMerge(t *testing.T) {
 			t.Helper()
 			blob := rawdb.ReadStorageSnapshot(db, account, slot)
 			if bytes.Compare(append(account[:], slot[:]...), genMarker) > 0 && blob != nil {
-				t.Fatalf("test %d: post-marker (%x) storage database access (%x:%x) succeded: %x", i, genMarker, account, slot, blob)
+				t.Fatalf("test %d: post-marker (%x) storage database access (%x:%x) succeeded: %x", i, genMarker, account, slot, blob)
 			}
 			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 && !bytes.Equal(blob, data) {
 				t.Fatalf("test %d: pre-marker (%x) storage database access (%x:%x) mismatch: have %x, want %x", i, genMarker, account, slot, blob, data)
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
index 0f9e5fae5..8a407e30d 100644
--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -93,7 +93,7 @@ func (gs *generatorStats) Log(msg string, marker []byte) {
 // and generation is continued in the background until done.
 func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, wiper chan struct{}) *diskLayer {
 	// Wipe any previously existing snapshot from the database if no wiper is
-	// currenty in progress.
+	// currently in progress.
 	if wiper == nil {
 		wiper = wipeSnapshot(diskdb, true)
 	}
diff --git a/core/state/snapshot/iteration.md b/core/state/snapshot/iteration.md
deleted file mode 100644
index ca1962d42..000000000
--- a/core/state/snapshot/iteration.md
+++ /dev/null
@@ -1,60 +0,0 @@
-
-## How the fast iterator works
-
-Consider the following example, where we have `6` iterators, sorted from
-left to right in ascending order.
-
-Our 'primary' `A` iterator is on the left, containing the elements `[0,1,8]`
-```
- A  B  C  D  E  F
-
- 0  1  2  4  7  9
- 1  2  9  -  14 13
- 8  8  -     15 15
- -  -        -  16
-                 -
-```
-When we call `Next` on the primary iterator, we get (ignoring the future keys)
-
-```
-A  B  C  D  E  F
-
-1  1  2  4  7  9
-```
-We detect that we now got an equality between our element and the next element.
-And we need to continue `Next`ing on the next element
-
-```
-1  2  2  4  7  9
-```
-And move on:
-```
-A  B  C  D  E  F
-
-1  2  9  4  7  9
-```
-Now we broke out of the equality, but we need to re-sort the element `C`
-
-```
-A  B  D  E  F  C
-
-1  2  4  7  9  9
-```
-
-And after shifting it rightwards, we check equality again, and find `C == F`, and thus
-call `Next` on `C`
-
-```
-A  B  D  E  F  C
-
-1  2  4  7  9  -
-```
-At this point, `C` was exhausted, and is removed
-
-```
-A  B  D  E  F
-
-1  2  4  7  9
-```
-And we're done with this step.
-
diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go
index 39288e6fb..7d647ee7b 100644
--- a/core/state/snapshot/iterator_binary.go
+++ b/core/state/snapshot/iterator_binary.go
@@ -35,7 +35,7 @@ type binaryAccountIterator struct {
 }
 
 // newBinaryAccountIterator creates a simplistic account iterator to step over
-// all the accounts in a slow, but eaily verifyable way.
+// all the accounts in a slow, but eaily verifiable way.
 func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
 	parent, ok := dl.parent.(*diffLayer)
 	if !ok {
diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
index b5ffab7c8..ef0212ac2 100644
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
@@ -41,7 +41,7 @@ func (its weightedAccountIterators) Len() int { return len(its) }
 // Less implements sort.Interface, returning which of two iterators in the stack
 // is before the other.
 func (its weightedAccountIterators) Less(i, j int) bool {
-	// Order the iterators primarilly by the account hashes
+	// Order the iterators primarily by the account hashes
 	hashI := its[i].it.Hash()
 	hashJ := its[j].it.Hash()
 
@@ -131,7 +131,7 @@ func (fi *fastAccountIterator) init() {
 				// determine which.
 				//
 				// This whole else-block can be avoided, if we instead
-				// do an inital priority-sort of the iterators. If we do that,
+				// do an initial priority-sort of the iterators. If we do that,
 				// then we'll only wind up here if a lower-priority (preferred) iterator
 				// has the same value, and then we will always just continue.
 				// However, it costs an extra sort, so it's probably not better
@@ -233,16 +233,8 @@ func (fi *fastAccountIterator) next(idx int) bool {
 		// The elem we're placing it next to has the same value,
 		// so whichever winds up on n+1 will need further iteraton
 		clash = n + 1
-		if cur.priority < fi.iterators[n+1].priority {
-			// We can drop the iterator here
-			return true
-		}
-		// We need to move it one step further
-		return false
-		// TODO benchmark which is best, this works too:
-		//clash = n
-		//return true
-		// Doing so should finish the current search earlier
+
+		return cur.priority < fi.iterators[n+1].priority
 	})
 	fi.move(idx, index)
 	if clash != -1 {
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
index 902985cf6..dbfafd73d 100644
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
@@ -67,14 +67,11 @@ func (ti *testIterator) Seek(common.Hash) {
 
 func (ti *testIterator) Next() bool {
 	ti.values = ti.values[1:]
-	if len(ti.values) == 0 {
-		return false
-	}
-	return true
+	return len(ti.values) > 0
 }
 
 func (ti *testIterator) Error() error {
-	panic("implement me")
+	return nil
 }
 
 func (ti *testIterator) Hash() common.Hash {
@@ -82,7 +79,7 @@ func (ti *testIterator) Hash() common.Hash {
 }
 
 func (ti *testIterator) Account() []byte {
-	panic("implement me")
+	return nil
 }
 
 func (ti *testIterator) Release() {}
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
index 01b88bae6..8e039606f 100644
--- a/core/state/snapshot/journal.go
+++ b/core/state/snapshot/journal.go
@@ -164,7 +164,7 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
 // Journal writes the persistent layer generator stats into a buffer to be stored
 // in the database as the snapshot journal.
 func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
-	// If the snapshot is currenty being generated, abort it
+	// If the snapshot is currently being generated, abort it
 	var stats *generatorStats
 	if dl.genAbort != nil {
 		abort := make(chan *generatorStats)
diff --git a/core/state/snapshot/sort.go b/core/state/snapshot/sort.go
index ee7cc4990..88841231d 100644
--- a/core/state/snapshot/sort.go
+++ b/core/state/snapshot/sort.go
@@ -34,59 +34,3 @@ func (hs hashes) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:])
 
 // Swap swaps the elements with indexes i and j.
 func (hs hashes) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] }
-
-// merge combines two sorted lists of hashes into a combo sorted one.
-func merge(a, b []common.Hash) []common.Hash {
-	result := make([]common.Hash, len(a)+len(b))
-
-	i := 0
-	for len(a) > 0 && len(b) > 0 {
-		if bytes.Compare(a[0][:], b[0][:]) < 0 {
-			result[i] = a[0]
-			a = a[1:]
-		} else {
-			result[i] = b[0]
-			b = b[1:]
-		}
-		i++
-	}
-	for j := 0; j < len(a); j++ {
-		result[i] = a[j]
-		i++
-	}
-	for j := 0; j < len(b); j++ {
-		result[i] = b[j]
-		i++
-	}
-	return result
-}
-
-// dedupMerge combines two sorted lists of hashes into a combo sorted one,
-// and removes duplicates in the process
-func dedupMerge(a, b []common.Hash) []common.Hash {
-	result := make([]common.Hash, len(a)+len(b))
-	i := 0
-	for len(a) > 0 && len(b) > 0 {
-		if diff := bytes.Compare(a[0][:], b[0][:]); diff < 0 {
-			result[i] = a[0]
-			a = a[1:]
-		} else {
-			result[i] = b[0]
-			b = b[1:]
-			// If they were equal, progress a too
-			if diff == 0 {
-				a = a[1:]
-			}
-		}
-		i++
-	}
-	for j := 0; j < len(a); j++ {
-		result[i] = a[j]
-		i++
-	}
-	for j := 0; j < len(b); j++ {
-		result[i] = b[j]
-		i++
-	}
-	return result[:i]
-}
diff --git a/core/state/state_test.go b/core/state/state_test.go
index 0c920a9a2..41d9b4655 100644
--- a/core/state/state_test.go
+++ b/core/state/state_test.go
@@ -36,7 +36,7 @@ type stateTest struct {
 
 func newStateTest() *stateTest {
 	db := rawdb.NewMemoryDatabase()
-	sdb, _ := New(common.Hash{}, NewDatabase(db))
+	sdb, _ := New(common.Hash{}, NewDatabase(db), nil)
 	return &stateTest{db: db, state: sdb}
 }
 
@@ -146,7 +146,7 @@ func TestSnapshotEmpty(t *testing.T) {
 }
 
 func TestSnapshot2(t *testing.T) {
-	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 
 	stateobjaddr0 := toAddr([]byte("so0"))
 	stateobjaddr1 := toAddr([]byte("so1"))
diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go
index a065d2c55..ad6aeb22e 100644
--- a/core/state/statedb_test.go
+++ b/core/state/statedb_test.go
@@ -39,7 +39,7 @@ import (
 func TestUpdateLeaks(t *testing.T) {
 	// Create an empty state database
 	db := rawdb.NewMemoryDatabase()
-	state, _ := New(common.Hash{}, NewDatabase(db))
+	state, _ := New(common.Hash{}, NewDatabase(db), nil)
 
 	// Update it with some accounts
 	for i := byte(0); i < 255; i++ {
@@ -73,8 +73,8 @@ func TestIntermediateLeaks(t *testing.T) {
 	// Create two state databases, one transitioning to the final state, the other final from the beginning
 	transDb := rawdb.NewMemoryDatabase()
 	finalDb := rawdb.NewMemoryDatabase()
-	transState, _ := New(common.Hash{}, NewDatabase(transDb))
-	finalState, _ := New(common.Hash{}, NewDatabase(finalDb))
+	transState, _ := New(common.Hash{}, NewDatabase(transDb), nil)
+	finalState, _ := New(common.Hash{}, NewDatabase(finalDb), nil)
 
 	modify := func(state *StateDB, addr common.Address, i, tweak byte) {
 		state.SetBalance(addr, big.NewInt(int64(11*i)+int64(tweak)))
@@ -149,7 +149,7 @@ func TestIntermediateLeaks(t *testing.T) {
 // https://github.com/ethereum/go-ethereum/pull/15549.
 func TestCopy(t *testing.T) {
 	// Create a random state test to copy and modify "independently"
-	orig, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+	orig, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 
 	for i := byte(0); i < 255; i++ {
 		obj := orig.GetOrNewStateObject(common.BytesToAddress([]byte{i}))
@@ -385,7 +385,7 @@ func (test *snapshotTest) String() string {
 func (test *snapshotTest) run() bool {
 	// Run all actions and create snapshots.
 	var (
-		state, _     = New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+		state, _     = New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		snapshotRevs = make([]int, len(test.snapshots))
 		sindex       = 0
 	)
@@ -399,7 +399,7 @@ func (test *snapshotTest) run() bool {
 	// Revert all snapshots in reverse order. Each revert must yield a state
 	// that is equivalent to fresh state with all actions up the snapshot applied.
 	for sindex--; sindex >= 0; sindex-- {
-		checkstate, _ := New(common.Hash{}, state.Database())
+		checkstate, _ := New(common.Hash{}, state.Database(), nil)
 		for _, action := range test.actions[:test.snapshots[sindex]] {
 			action.fn(action, checkstate)
 		}
@@ -477,7 +477,7 @@ func TestTouchDelete(t *testing.T) {
 // TestCopyOfCopy tests that modified objects are carried over to the copy, and the copy of the copy.
 // See https://github.com/ethereum/go-ethereum/pull/15225#issuecomment-380191512
 func TestCopyOfCopy(t *testing.T) {
-	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	addr := common.HexToAddress("aaaa")
 	state.SetBalance(addr, big.NewInt(42))
 
@@ -494,7 +494,7 @@ func TestCopyOfCopy(t *testing.T) {
 //
 // See https://github.com/ethereum/go-ethereum/issues/20106.
 func TestCopyCommitCopy(t *testing.T) {
-	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 
 	// Create an account and check if the retrieved balance is correct
 	addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe")
@@ -566,7 +566,7 @@ func TestCopyCommitCopy(t *testing.T) {
 //
 // See https://github.com/ethereum/go-ethereum/issues/20106.
 func TestCopyCopyCommitCopy(t *testing.T) {
-	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 
 	// Create an account and check if the retrieved balance is correct
 	addr := common.HexToAddress("0xaffeaffeaffeaffeaffeaffeaffeaffeaffeaffe")
@@ -656,7 +656,7 @@ func TestCopyCopyCommitCopy(t *testing.T) {
 // first, but the journal wiped the entire state object on create-revert.
 func TestDeleteCreateRevert(t *testing.T) {
 	// Create an initial state with a single contract
-	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()))
+	state, _ := New(common.Hash{}, NewDatabase(rawdb.NewMemoryDatabase()), nil)
 
 	addr := toAddr([]byte("so"))
 	state.SetBalance(addr, big.NewInt(1))
diff --git a/core/state/sync_test.go b/core/state/sync_test.go
index f4a221bd9..924c8c2f9 100644
--- a/core/state/sync_test.go
+++ b/core/state/sync_test.go
@@ -41,7 +41,7 @@ type testAccount struct {
 func makeTestState() (Database, common.Hash, []*testAccount) {
 	// Create an empty state
 	db := NewDatabase(rawdb.NewMemoryDatabase())
-	state, _ := New(common.Hash{}, db)
+	state, _ := New(common.Hash{}, db, nil)
 
 	// Fill it with some arbitrary data
 	accounts := []*testAccount{}
@@ -72,7 +72,7 @@ func makeTestState() (Database, common.Hash, []*testAccount) {
 // account array.
 func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accounts []*testAccount) {
 	// Check root availability and state contents
-	state, err := New(root, NewDatabase(db))
+	state, err := New(root, NewDatabase(db), nil)
 	if err != nil {
 		t.Fatalf("failed to create state trie at %x: %v", root, err)
 	}
@@ -113,7 +113,7 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error {
 	if _, err := db.Get(root.Bytes()); err != nil {
 		return nil // Consider a non existent state consistent.
 	}
-	state, err := New(root, NewDatabase(db))
+	state, err := New(root, NewDatabase(db), nil)
 	if err != nil {
 		return err
 	}
diff --git a/core/tx_pool_test.go b/core/tx_pool_test.go
index 4db3e6dee..a56151eba 100644
--- a/core/tx_pool_test.go
+++ b/core/tx_pool_test.go
@@ -86,7 +86,7 @@ func pricedDataTransaction(nonce uint64, gaslimit uint64, gasprice *big.Int, key
 }
 
 func setupTxPool() (*TxPool, *ecdsa.PrivateKey) {
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 10000000, new(event.Feed)}
 
 	key, _ := crypto.GenerateKey()
@@ -171,7 +171,7 @@ func (c *testChain) State() (*state.StateDB, error) {
 	// a state change between those fetches.
 	stdb := c.statedb
 	if *c.trigger {
-		c.statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		c.statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		// simulate that the new head block included tx0 and tx1
 		c.statedb.SetNonce(c.address, 2)
 		c.statedb.SetBalance(c.address, new(big.Int).SetUint64(params.Ether))
@@ -189,7 +189,7 @@ func TestStateChangeDuringTransactionPoolReset(t *testing.T) {
 	var (
 		key, _     = crypto.GenerateKey()
 		address    = crypto.PubkeyToAddress(key.PublicKey)
-		statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		trigger    = false
 	)
 
@@ -345,7 +345,7 @@ func TestTransactionChainFork(t *testing.T) {
 
 	addr := crypto.PubkeyToAddress(key.PublicKey)
 	resetState := func() {
-		statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		statedb.AddBalance(addr, big.NewInt(100000000000000))
 
 		pool.chain = &testBlockChain{statedb, 1000000, new(event.Feed)}
@@ -374,7 +374,7 @@ func TestTransactionDoubleNonce(t *testing.T) {
 
 	addr := crypto.PubkeyToAddress(key.PublicKey)
 	resetState := func() {
-		statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		statedb.AddBalance(addr, big.NewInt(100000000000000))
 
 		pool.chain = &testBlockChain{statedb, 1000000, new(event.Feed)}
@@ -565,7 +565,7 @@ func TestTransactionPostponing(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the postponing with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	pool := NewTxPool(testTxPoolConfig, params.TestChainConfig, blockchain)
@@ -778,7 +778,7 @@ func testTransactionQueueGlobalLimiting(t *testing.T, nolocals bool) {
 	t.Parallel()
 
 	// Create the pool to test the limit enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -866,7 +866,7 @@ func testTransactionQueueTimeLimiting(t *testing.T, nolocals bool) {
 	evictionInterval = time.Second
 
 	// Create the pool to test the non-expiration enforcement
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -969,7 +969,7 @@ func TestTransactionPendingGlobalLimiting(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the limit enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -1071,7 +1071,7 @@ func TestTransactionCapClearsFromAll(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the limit enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -1105,7 +1105,7 @@ func TestTransactionPendingMinimumAllowance(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the limit enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -1153,7 +1153,7 @@ func TestTransactionPoolRepricing(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the pricing enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	pool := NewTxPool(testTxPoolConfig, params.TestChainConfig, blockchain)
@@ -1274,7 +1274,7 @@ func TestTransactionPoolRepricingKeepsLocals(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the pricing enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	pool := NewTxPool(testTxPoolConfig, params.TestChainConfig, blockchain)
@@ -1336,7 +1336,7 @@ func TestTransactionPoolUnderpricing(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the pricing enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -1442,7 +1442,7 @@ func TestTransactionPoolStableUnderpricing(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the pricing enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -1507,7 +1507,7 @@ func TestTransactionDeduplication(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the pricing enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	pool := NewTxPool(testTxPoolConfig, params.TestChainConfig, blockchain)
@@ -1573,7 +1573,7 @@ func TestTransactionReplacement(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the pricing enforcement with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	pool := NewTxPool(testTxPoolConfig, params.TestChainConfig, blockchain)
@@ -1668,7 +1668,7 @@ func testTransactionJournaling(t *testing.T, nolocals bool) {
 	os.Remove(journal)
 
 	// Create the original pool to inject transaction into the journal
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	config := testTxPoolConfig
@@ -1766,7 +1766,7 @@ func TestTransactionStatusCheck(t *testing.T) {
 	t.Parallel()
 
 	// Create the pool to test the status retrievals with
-	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+	statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 	blockchain := &testBlockChain{statedb, 1000000, new(event.Feed)}
 
 	pool := NewTxPool(testTxPoolConfig, params.TestChainConfig, blockchain)
diff --git a/core/vm/gas_table_test.go b/core/vm/gas_table_test.go
index 5d443de0e..2d8d3c6bc 100644
--- a/core/vm/gas_table_test.go
+++ b/core/vm/gas_table_test.go
@@ -81,7 +81,7 @@ func TestEIP2200(t *testing.T) {
 	for i, tt := range eip2200Tests {
 		address := common.BytesToAddress([]byte("contract"))
 
-		statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()))
+		statedb, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 		statedb.CreateAccount(address)
 		statedb.SetCode(address, hexutil.MustDecode(tt.input))
 		statedb.SetState(address, common.Hash{}, common.BytesToHash([]byte{tt.original}))
diff --git a/les/odr_test.go b/les/odr_test.go
index 7d1087822..45ed9e065 100644
--- a/les/odr_test.go
+++ b/les/odr_test.go
@@ -91,7 +91,7 @@ func odrAccounts(ctx context.Context, db ethdb.Database, config *params.ChainCon
 	for _, addr := range acc {
 		if bc != nil {
 			header := bc.GetHeaderByHash(bhash)
-			st, err = state.New(header.Root, state.NewDatabase(db))
+			st, err = state.New(header.Root, state.NewDatabase(db), nil)
 		} else {
 			header := lc.GetHeaderByHash(bhash)
 			st = light.NewState(ctx, header, lc.Odr())
@@ -122,7 +122,7 @@ func odrContractCall(ctx context.Context, db ethdb.Database, config *params.Chai
 		data[35] = byte(i)
 		if bc != nil {
 			header := bc.GetHeaderByHash(bhash)
-			statedb, err := state.New(header.Root, state.NewDatabase(db))
+			statedb, err := state.New(header.Root, state.NewDatabase(db), nil)
 
 			if err == nil {
 				from := statedb.GetOrNewStateObject(bankAddr)

From 92ec07d63bc06241df6b9c8cec6c9d5954a192f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Thu, 27 Feb 2020 15:03:10 +0200
Subject: [PATCH 18/28] core/state: fix an account resurrection issue

---
 core/state/state_object.go | 9 +--------
 core/state/statedb.go      | 8 ++++----
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/core/state/state_object.go b/core/state/state_object.go
index d10caa831..26e0b08f5 100644
--- a/core/state/state_object.go
+++ b/core/state/state_object.go
@@ -297,18 +297,11 @@ func (s *stateObject) updateTrie(db Database) Trie {
 	// Retrieve the snapshot storage map for the object
 	var storage map[common.Hash][]byte
 	if s.db.snap != nil {
-		// Retrieve the old storage map, if available
-		s.db.snapLock.RLock()
+		// Retrieve the old storage map, if available, create a new one otherwise
 		storage = s.db.snapStorage[s.addrHash]
-		s.db.snapLock.RUnlock()
-
-		// If no old storage map was available, create a new one
 		if storage == nil {
 			storage = make(map[common.Hash][]byte)
-
-			s.db.snapLock.Lock()
 			s.db.snapStorage[s.addrHash] = storage
-			s.db.snapLock.Unlock()
 		}
 	}
 	// Insert all the pending updates into the trie
diff --git a/core/state/statedb.go b/core/state/statedb.go
index b3ea95a46..d4a91ee71 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -22,7 +22,6 @@ import (
 	"fmt"
 	"math/big"
 	"sort"
-	"sync"
 	"time"
 
 	"github.com/ethereum/go-ethereum/common"
@@ -72,7 +71,6 @@ type StateDB struct {
 	snap         snapshot.Snapshot
 	snapAccounts map[common.Hash][]byte
 	snapStorage  map[common.Hash]map[common.Hash][]byte
-	snapLock     sync.RWMutex // Lock for the concurrent storage updaters
 
 	// This map holds 'live' objects, which will get modified while processing a state transition.
 	stateObjects        map[common.Address]*stateObject
@@ -468,6 +466,10 @@ func (s *StateDB) updateStateObject(obj *stateObject) {
 
 	// If state snapshotting is active, cache the data til commit
 	if s.snap != nil {
+		// If the account is an empty resurrection, unmark the storage nil-ness
+		if storage, ok := s.snapStorage[obj.addrHash]; storage == nil && ok {
+			delete(s.snapStorage, obj.addrHash)
+		}
 		s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
 	}
 }
@@ -484,10 +486,8 @@ func (s *StateDB) deleteStateObject(obj *stateObject) {
 
 	// If state snapshotting is active, cache the data til commit
 	if s.snap != nil {
-		s.snapLock.Lock()
 		s.snapAccounts[obj.addrHash] = nil // We need to maintain account deletions explicitly
 		s.snapStorage[obj.addrHash] = nil  // We need to maintain storage deletions explicitly
-		s.snapLock.Unlock()
 	}
 }
 

From 361a6f08acad506c16cef1a1436b5975478e811f Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Mon, 2 Mar 2020 13:46:56 +0100
Subject: [PATCH 19/28] core/tests: test for destroy+recreate contract with
 storage

---
 core/blockchain_test.go | 128 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/core/blockchain_test.go b/core/blockchain_test.go
index 4bed1b451..72f9898e5 100644
--- a/core/blockchain_test.go
+++ b/core/blockchain_test.go
@@ -2362,3 +2362,131 @@ func TestDeleteCreateRevert(t *testing.T) {
 		t.Fatalf("block %d: failed to insert into chain: %v", n, err)
 	}
 }
+
+// TestDeleteRecreate tests a state-transition that contains both deletion
+// and recreation of contract state.
+// Contract A exists, has slots 1 and 2 set
+// Tx 1: Selfdestruct A
+// Tx 2: Re-create A, set slots 3 and 4
+// Expected outcome is that _all_ slots are cleared from A, due to the selfdestruct,
+// and then the new slots exist
+func TestDeleteRecreate(t *testing.T) {
+	var (
+		// Generate a canonical chain to act as the main dataset
+		engine = ethash.NewFaker()
+		db     = rawdb.NewMemoryDatabase()
+		// A sender who makes transactions, has some funds
+		key, _  = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291")
+		address = crypto.PubkeyToAddress(key.PublicKey)
+		funds   = big.NewInt(1000000000)
+
+		aa        = common.HexToAddress("0x7217d81b76bdd8707601e959454e3d776aee5f43")
+		bb        = common.HexToAddress("0x000000000000000000000000000000000000bbbb")
+		aaStorage = make(map[common.Hash]common.Hash) // Initial storage in AA
+		aaCode    = []byte{byte(vm.PC), 0xFF}         // Code for AA (simple selfdestruct)
+	)
+	// Populate two slots
+	aaStorage[common.HexToHash("01")] = common.HexToHash("01")
+	aaStorage[common.HexToHash("02")] = common.HexToHash("02")
+
+	// The bb-code needs to CREATE2 the aa contract. It consists of
+	// both initcode and deployment code
+	// initcode:
+	// 1. Set slots 3=3, 4=4,
+	// 2. Return aaCode
+
+	initCode := []byte{
+		byte(vm.PUSH1), 0x3, // value
+		byte(vm.PUSH1), 0x3, // location
+		byte(vm.SSTORE),     // Set slot[3] = 1
+		byte(vm.PUSH1), 0x4, // value
+		byte(vm.PUSH1), 0x4, // location
+		byte(vm.SSTORE), // Set slot[4] = 1
+		// Slots are set, now return the code
+		byte(vm.PUSH2), 0x88, 0xff, // Push code on stack
+		byte(vm.PUSH1), 0x0, // memory start on stack
+		byte(vm.MSTORE),
+		// Code is now in memory.
+		byte(vm.PUSH1), 0x2, // size
+		byte(vm.PUSH1), byte(32 - 2), // offset
+		byte(vm.RETURN),
+	}
+	if l := len(initCode); l > 32 {
+		t.Fatalf("init code is too long for a pushx, need a more elaborate deployer")
+	}
+	bbCode := []byte{
+		// Push initcode onto stack
+		byte(vm.PUSH1) + byte(len(initCode)-1)}
+	bbCode = append(bbCode, initCode...)
+	bbCode = append(bbCode, []byte{
+		byte(vm.PUSH1), 0x0, // memory start on stack
+		byte(vm.MSTORE),
+		byte(vm.PUSH1), 0x00, // salt
+		byte(vm.PUSH1), byte(len(initCode)), // size
+		byte(vm.PUSH1), byte(32 - len(initCode)), // offset
+		byte(vm.PUSH1), 0x00, // endowment
+		byte(vm.CREATE2),
+	}...)
+
+	gspec := &Genesis{
+		Config: params.TestChainConfig,
+		Alloc: GenesisAlloc{
+			address: {Balance: funds},
+			// The address 0xAAAAA selfdestructs if called
+			aa: {
+				// Code needs to just selfdestruct
+				Code:    aaCode,
+				Nonce:   1,
+				Balance: big.NewInt(0),
+				Storage: aaStorage,
+			},
+			// The contract BB recreates AA
+			bb: {
+				Code:    bbCode,
+				Balance: big.NewInt(1),
+			},
+		},
+	}
+	genesis := gspec.MustCommit(db)
+
+	blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 1, func(i int, b *BlockGen) {
+		b.SetCoinbase(common.Address{1})
+		// One transaction to AA, to kill it
+		tx, _ := types.SignTx(types.NewTransaction(0, aa,
+			big.NewInt(0), 50000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		b.AddTx(tx)
+		// One transaction to BB, to recreate AA
+		tx, _ = types.SignTx(types.NewTransaction(1, bb,
+			big.NewInt(0), 100000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		b.AddTx(tx)
+	})
+	// Import the canonical chain
+	diskdb := rawdb.NewMemoryDatabase()
+	gspec.MustCommit(diskdb)
+	chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{
+		Debug:  true,
+		Tracer: vm.NewJSONLogger(nil, os.Stdout),
+	}, nil)
+	if err != nil {
+		t.Fatalf("failed to create tester chain: %v", err)
+	}
+	if n, err := chain.InsertChain(blocks); err != nil {
+		t.Fatalf("block %d: failed to insert into chain: %v", n, err)
+	}
+	statedb, _ := chain.State()
+
+	// If all is correct, then slot 1 and 2 are zero
+	if got, exp := statedb.GetState(aa, common.HexToHash("01")), (common.Hash{}); got != exp {
+		t.Errorf("got %x exp %x", got, exp)
+	}
+	if got, exp := statedb.GetState(aa, common.HexToHash("02")), (common.Hash{}); got != exp {
+		t.Errorf("got %x exp %x", got, exp)
+	}
+	// Also, 3 and 4 should be set
+	if got, exp := statedb.GetState(aa, common.HexToHash("03")), common.HexToHash("03"); got != exp {
+		t.Fatalf("got %x exp %x", got, exp)
+	}
+	if got, exp := statedb.GetState(aa, common.HexToHash("04")), common.HexToHash("04"); got != exp {
+		t.Fatalf("got %x exp %x", got, exp)
+	}
+}

From fe8347ea8a596ed233911340368ffd607fc23f70 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Mon, 2 Mar 2020 14:06:44 +0100
Subject: [PATCH 20/28] squashme

---
 core/blockchain_test.go | 77 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/core/blockchain_test.go b/core/blockchain_test.go
index 72f9898e5..69a245322 100644
--- a/core/blockchain_test.go
+++ b/core/blockchain_test.go
@@ -2363,14 +2363,14 @@ func TestDeleteCreateRevert(t *testing.T) {
 	}
 }
 
-// TestDeleteRecreate tests a state-transition that contains both deletion
+// TestDeleteRecreateSlots tests a state-transition that contains both deletion
 // and recreation of contract state.
 // Contract A exists, has slots 1 and 2 set
 // Tx 1: Selfdestruct A
 // Tx 2: Re-create A, set slots 3 and 4
 // Expected outcome is that _all_ slots are cleared from A, due to the selfdestruct,
 // and then the new slots exist
-func TestDeleteRecreate(t *testing.T) {
+func TestDeleteRecreateSlots(t *testing.T) {
 	var (
 		// Generate a canonical chain to act as the main dataset
 		engine = ethash.NewFaker()
@@ -2490,3 +2490,76 @@ func TestDeleteRecreate(t *testing.T) {
 		t.Fatalf("got %x exp %x", got, exp)
 	}
 }
+
+// TestDeleteRecreateAccount tests a state-transition that contains deletion of a
+// contract with storage, and a recreate of the same contract via a
+// regular value-transfer
+// Expected outcome is that _all_ slots are cleared from A
+func TestDeleteRecreateAccount(t *testing.T) {
+	var (
+		// Generate a canonical chain to act as the main dataset
+		engine = ethash.NewFaker()
+		db     = rawdb.NewMemoryDatabase()
+		// A sender who makes transactions, has some funds
+		key, _  = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291")
+		address = crypto.PubkeyToAddress(key.PublicKey)
+		funds   = big.NewInt(1000000000)
+
+		aa        = common.HexToAddress("0x7217d81b76bdd8707601e959454e3d776aee5f43")
+		aaStorage = make(map[common.Hash]common.Hash) // Initial storage in AA
+		aaCode    = []byte{byte(vm.PC), 0xFF}         // Code for AA (simple selfdestruct)
+	)
+	// Populate two slots
+	aaStorage[common.HexToHash("01")] = common.HexToHash("01")
+	aaStorage[common.HexToHash("02")] = common.HexToHash("02")
+
+	gspec := &Genesis{
+		Config: params.TestChainConfig,
+		Alloc: GenesisAlloc{
+			address: {Balance: funds},
+			// The address 0xAAAAA selfdestructs if called
+			aa: {
+				// Code needs to just selfdestruct
+				Code:    aaCode,
+				Nonce:   1,
+				Balance: big.NewInt(0),
+				Storage: aaStorage,
+			},
+		},
+	}
+	genesis := gspec.MustCommit(db)
+
+	blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 1, func(i int, b *BlockGen) {
+		b.SetCoinbase(common.Address{1})
+		// One transaction to AA, to kill it
+		tx, _ := types.SignTx(types.NewTransaction(0, aa,
+			big.NewInt(0), 50000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		b.AddTx(tx)
+		// One transaction to AA, to recreate it (but without storage
+		tx, _ = types.SignTx(types.NewTransaction(1, aa,
+			big.NewInt(1), 100000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		b.AddTx(tx)
+	})
+	// Import the canonical chain
+	diskdb := rawdb.NewMemoryDatabase()
+	gspec.MustCommit(diskdb)
+	chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{
+		Debug:  true,
+		Tracer: vm.NewJSONLogger(nil, os.Stdout),
+	}, nil)
+	if err != nil {
+		t.Fatalf("failed to create tester chain: %v", err)
+	}
+	if n, err := chain.InsertChain(blocks); err != nil {
+		t.Fatalf("block %d: failed to insert into chain: %v", n, err)
+	}
+	statedb, _ := chain.State()
+
+	// If all is correct, then both slots are zero
+	if got, exp := statedb.GetState(aa, common.HexToHash("01")), (common.Hash{}); got != exp {
+		t.Errorf("got %x exp %x", got, exp)
+	}
+	if got, exp := statedb.GetState(aa, common.HexToHash("02")), (common.Hash{}); got != exp {
+		t.Errorf("got %x exp %x", got, exp)
+	}
+}

From 6e05ccd845da26774774185956b3fd67966894ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 3 Mar 2020 09:10:23 +0200
Subject: [PATCH 21/28] core/state/snapshot, tests: sync snap gen + snaps in
 consensus tests

---
 cmd/evm/staterunner.go           |  2 +-
 core/blockchain.go               |  4 +++-
 core/state/snapshot/disklayer.go |  5 +++--
 core/state/snapshot/generate.go  | 14 ++++++------
 core/state/snapshot/journal.go   |  1 +
 core/state/snapshot/snapshot.go  | 37 ++++++++++++++++++++++++++------
 eth/tracers/tracers_test.go      |  4 ++--
 tests/block_test.go              |  8 ++++---
 tests/block_test_util.go         |  9 ++++++--
 tests/state_test.go              | 13 ++++++++---
 tests/state_test_util.go         | 19 ++++++++++------
 tests/transaction_test_util.go   |  1 -
 tests/vm_test.go                 |  5 ++++-
 tests/vm_test_util.go            |  4 ++--
 14 files changed, 90 insertions(+), 36 deletions(-)

diff --git a/cmd/evm/staterunner.go b/cmd/evm/staterunner.go
index cef2aedb5..52c1eca71 100644
--- a/cmd/evm/staterunner.go
+++ b/cmd/evm/staterunner.go
@@ -96,7 +96,7 @@ func stateTestCmd(ctx *cli.Context) error {
 		for _, st := range test.Subtests() {
 			// Run the test and aggregate the result
 			result := &StatetestResult{Name: key, Fork: st.Fork, Pass: true}
-			state, err := test.Run(st, cfg)
+			state, err := test.Run(st, cfg, false)
 			// print state root for evmlab tracing
 			if ctx.GlobalBool(MachineFlag.Name) && state != nil {
 				fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%x\"}\n", state.IntermediateRoot(false))
diff --git a/core/blockchain.go b/core/blockchain.go
index 491eccecd..b0309ef70 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -121,6 +121,8 @@ type CacheConfig struct {
 	TrieDirtyDisabled   bool          // Whether to disable trie write caching and GC altogether (archive node)
 	TrieTimeLimit       time.Duration // Time limit after which to flush the current in-memory trie to disk
 	SnapshotLimit       int           // Memory allowance (MB) to use for caching snapshot entries in memory
+
+	SnapshotWait bool // Wait for snapshot construction on startup. TODO(karalabe): This is a dirty hack for testing, nuke it
 }
 
 // BlockChain represents the canonical chain given a database with a genesis
@@ -303,7 +305,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 	}
 	// Load any existing snapshot, regenerating it if loading failed
 	if bc.cacheConfig.SnapshotLimit > 0 {
-		bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root())
+		bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root(), !bc.cacheConfig.SnapshotWait)
 	}
 	// Take ownership of this particular state
 	go bc.update()
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index 0c4c3deb1..3266424a8 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -37,8 +37,9 @@ type diskLayer struct {
 	root  common.Hash // Root hash of the base snapshot
 	stale bool        // Signals that the layer became stale (state progressed)
 
-	genMarker []byte                    // Marker for the state that's indexed during initial layer generation
-	genAbort  chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer
+	genMarker  []byte                    // Marker for the state that's indexed during initial layer generation
+	genPending chan struct{}             // Notification channel when generation is done (test synchronicity)
+	genAbort   chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer
 
 	lock sync.RWMutex
 }
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
index 8a407e30d..4b017fe69 100644
--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -101,12 +101,13 @@ func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache i
 	rawdb.WriteSnapshotRoot(diskdb, root)
 
 	base := &diskLayer{
-		diskdb:    diskdb,
-		triedb:    triedb,
-		root:      root,
-		cache:     fastcache.New(cache * 1024 * 1024),
-		genMarker: []byte{}, // Initialized but empty!
-		genAbort:  make(chan chan *generatorStats),
+		diskdb:     diskdb,
+		triedb:     triedb,
+		root:       root,
+		cache:      fastcache.New(cache * 1024 * 1024),
+		genMarker:  []byte{}, // Initialized but empty!
+		genPending: make(chan struct{}),
+		genAbort:   make(chan chan *generatorStats),
 	}
 	go base.generate(&generatorStats{wiping: wiper, start: time.Now()})
 	return base
@@ -252,6 +253,7 @@ func (dl *diskLayer) generate(stats *generatorStats) {
 
 	dl.lock.Lock()
 	dl.genMarker = nil
+	close(dl.genPending)
 	dl.lock.Unlock()
 
 	// Someone will be looking for us, wait it out
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
index 8e039606f..c42a26d21 100644
--- a/core/state/snapshot/journal.go
+++ b/core/state/snapshot/journal.go
@@ -108,6 +108,7 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
 		if base.genMarker == nil {
 			base.genMarker = []byte{}
 		}
+		base.genPending = make(chan struct{})
 		base.genAbort = make(chan chan *generatorStats)
 
 		var origin uint64
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index ad602bbbb..d031dd2c1 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -164,7 +164,7 @@ type Tree struct {
 // If the snapshot is missing or inconsistent, the entirety is deleted and will
 // be reconstructed from scratch based on the tries in the key-value store, on a
 // background thread.
-func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) *Tree {
+func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree {
 	// Create a new, empty snapshot tree
 	snap := &Tree{
 		diskdb: diskdb,
@@ -172,6 +172,9 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
 		cache:  cache,
 		layers: make(map[common.Hash]snapshot),
 	}
+	if !async {
+		defer snap.waitBuild()
+	}
 	// Attempt to load a previously persisted snapshot and rebuild one if failed
 	head, err := loadSnapshot(diskdb, triedb, cache, root)
 	if err != nil {
@@ -187,6 +190,27 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
 	return snap
 }
 
+// waitBuild blocks until the snapshot finishes rebuilding. This method is meant
+// to  be used by tests to ensure we're testing what we believe we are.
+func (t *Tree) waitBuild() {
+	// Find the rebuild termination channel
+	var done chan struct{}
+
+	t.lock.RLock()
+	for _, layer := range t.layers {
+		if layer, ok := layer.(*diskLayer); ok {
+			done = layer.genPending
+			break
+		}
+	}
+	t.lock.RUnlock()
+
+	// Wait until the snapshot is generated
+	if done != nil {
+		<-done
+	}
+}
+
 // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
 // snapshot is maintained for that block.
 func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
@@ -477,11 +501,12 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 		log.Crit("Failed to write leftover snapshot", "err", err)
 	}
 	res := &diskLayer{
-		root:      bottom.root,
-		cache:     base.cache,
-		diskdb:    base.diskdb,
-		triedb:    base.triedb,
-		genMarker: base.genMarker,
+		root:       bottom.root,
+		cache:      base.cache,
+		diskdb:     base.diskdb,
+		triedb:     base.triedb,
+		genMarker:  base.genMarker,
+		genPending: base.genPending,
 	}
 	// If snapshot generation hasn't finished yet, port over all the starts and
 	// continue where the previous round left off.
diff --git a/eth/tracers/tracers_test.go b/eth/tracers/tracers_test.go
index 69eb80a5c..289c6c5bb 100644
--- a/eth/tracers/tracers_test.go
+++ b/eth/tracers/tracers_test.go
@@ -168,7 +168,7 @@ func TestPrestateTracerCreate2(t *testing.T) {
 		Code:    []byte{},
 		Balance: big.NewInt(500000000000000),
 	}
-	statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), alloc)
+	statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), alloc, false)
 
 	// Create the tracer, the EVM environment and run it
 	tracer, err := New("prestateTracer")
@@ -242,7 +242,7 @@ func TestCallTracer(t *testing.T) {
 				GasLimit:    uint64(test.Context.GasLimit),
 				GasPrice:    tx.GasPrice(),
 			}
-			statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc)
+			statedb := tests.MakePreState(rawdb.NewMemoryDatabase(), test.Genesis.Alloc, false)
 
 			// Create the tracer, the EVM environment and run it
 			tracer, err := New("callTracer")
diff --git a/tests/block_test.go b/tests/block_test.go
index 3a55e4c34..8fa90e3e3 100644
--- a/tests/block_test.go
+++ b/tests/block_test.go
@@ -45,11 +45,13 @@ func TestBlockchain(t *testing.T) {
 	bt.skipLoad(`.*randomStatetest94.json.*`)
 
 	bt.walk(t, blockTestDir, func(t *testing.T, name string, test *BlockTest) {
-		if err := bt.checkFailure(t, name, test.Run()); err != nil {
-			t.Error(err)
+		if err := bt.checkFailure(t, name+"/trie", test.Run(false)); err != nil {
+			t.Errorf("test without snapshotter failed: %v", err)
+		}
+		if err := bt.checkFailure(t, name+"/snap", test.Run(true)); err != nil {
+			t.Errorf("test with snapshotter failed: %v", err)
 		}
 	})
-
 	// There is also a LegacyTests folder, containing blockchain tests generated
 	// prior to Istanbul. However, they are all derived from GeneralStateTests,
 	// which run natively, so there's no reason to run them here.
diff --git a/tests/block_test_util.go b/tests/block_test_util.go
index b5f1de3ef..1ae986e3c 100644
--- a/tests/block_test_util.go
+++ b/tests/block_test_util.go
@@ -94,7 +94,7 @@ type btHeaderMarshaling struct {
 	Timestamp  math.HexOrDecimal64
 }
 
-func (t *BlockTest) Run() error {
+func (t *BlockTest) Run(snapshotter bool) error {
 	config, ok := Forks[t.json.Network]
 	if !ok {
 		return UnsupportedForkError{t.json.Network}
@@ -118,7 +118,12 @@ func (t *BlockTest) Run() error {
 	} else {
 		engine = ethash.NewShared()
 	}
-	chain, err := core.NewBlockChain(db, &core.CacheConfig{TrieCleanLimit: 0}, config, engine, vm.Config{}, nil)
+	cache := &core.CacheConfig{TrieCleanLimit: 0}
+	if snapshotter {
+		cache.SnapshotLimit = 1
+		cache.SnapshotWait = true
+	}
+	chain, err := core.NewBlockChain(db, cache, config, engine, vm.Config{}, nil)
 	if err != nil {
 		return err
 	}
diff --git a/tests/state_test.go b/tests/state_test.go
index f9499d4a8..c0a90b3a4 100644
--- a/tests/state_test.go
+++ b/tests/state_test.go
@@ -63,10 +63,17 @@ func TestState(t *testing.T) {
 				subtest := subtest
 				key := fmt.Sprintf("%s/%d", subtest.Fork, subtest.Index)
 				name := name + "/" + key
-				t.Run(key, func(t *testing.T) {
+
+				t.Run(key+"/trie", func(t *testing.T) {
 					withTrace(t, test.gasLimit(subtest), func(vmconfig vm.Config) error {
-						_, err := test.Run(subtest, vmconfig)
-						return st.checkFailure(t, name, err)
+						_, err := test.Run(subtest, vmconfig, false)
+						return st.checkFailure(t, name+"/trie", err)
+					})
+				})
+				t.Run(key+"/snap", func(t *testing.T) {
+					withTrace(t, test.gasLimit(subtest), func(vmconfig vm.Config) error {
+						_, err := test.Run(subtest, vmconfig, true)
+						return st.checkFailure(t, name+"/snap", err)
 					})
 				})
 			}
diff --git a/tests/state_test_util.go b/tests/state_test_util.go
index a10d044cd..5e5b96d52 100644
--- a/tests/state_test_util.go
+++ b/tests/state_test_util.go
@@ -24,6 +24,8 @@ import (
 	"strconv"
 	"strings"
 
+	"github.com/ethereum/go-ethereum/core/state/snapshot"
+
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/common/hexutil"
 	"github.com/ethereum/go-ethereum/common/math"
@@ -145,8 +147,8 @@ func (t *StateTest) Subtests() []StateSubtest {
 }
 
 // Run executes a specific subtest and verifies the post-state and logs
-func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config) (*state.StateDB, error) {
-	statedb, root, err := t.RunNoVerify(subtest, vmconfig)
+func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bool) (*state.StateDB, error) {
+	statedb, root, err := t.RunNoVerify(subtest, vmconfig, snapshotter)
 	if err != nil {
 		return statedb, err
 	}
@@ -163,14 +165,14 @@ func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config) (*state.StateD
 }
 
 // RunNoVerify runs a specific subtest and returns the statedb and post-state root
-func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config) (*state.StateDB, common.Hash, error) {
+func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool) (*state.StateDB, common.Hash, error) {
 	config, eips, err := getVMConfig(subtest.Fork)
 	if err != nil {
 		return nil, common.Hash{}, UnsupportedForkError{subtest.Fork}
 	}
 	vmconfig.ExtraEips = eips
 	block := t.genesis(config).ToBlock(nil)
-	statedb := MakePreState(rawdb.NewMemoryDatabase(), t.json.Pre)
+	statedb := MakePreState(rawdb.NewMemoryDatabase(), t.json.Pre, snapshotter)
 
 	post := t.json.Post[subtest.Fork][subtest.Index]
 	msg, err := t.json.Tx.toMessage(post)
@@ -204,7 +206,7 @@ func (t *StateTest) gasLimit(subtest StateSubtest) uint64 {
 	return t.json.Tx.GasLimit[t.json.Post[subtest.Fork][subtest.Index].Indexes.Gas]
 }
 
-func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB {
+func MakePreState(db ethdb.Database, accounts core.GenesisAlloc, snapshotter bool) *state.StateDB {
 	sdb := state.NewDatabase(db)
 	statedb, _ := state.New(common.Hash{}, sdb, nil)
 	for addr, a := range accounts {
@@ -217,7 +219,12 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB
 	}
 	// Commit and re-open to start with a clean state.
 	root, _ := statedb.Commit(false)
-	statedb, _ = state.New(root, sdb, nil)
+
+	var snaps *snapshot.Tree
+	if snapshotter {
+		snaps = snapshot.New(db, sdb.TrieDB(), 1, root, false)
+	}
+	statedb, _ = state.New(root, sdb, snaps)
 	return statedb
 }
 
diff --git a/tests/transaction_test_util.go b/tests/transaction_test_util.go
index 43debae83..aea90535c 100644
--- a/tests/transaction_test_util.go
+++ b/tests/transaction_test_util.go
@@ -45,7 +45,6 @@ type ttFork struct {
 }
 
 func (tt *TransactionTest) Run(config *params.ChainConfig) error {
-
 	validateTx := func(rlpData hexutil.Bytes, signer types.Signer, isHomestead bool, isIstanbul bool) (*common.Address, *common.Hash, error) {
 		tx := new(types.Transaction)
 		if err := rlp.DecodeBytes(rlpData, tx); err != nil {
diff --git a/tests/vm_test.go b/tests/vm_test.go
index 441483dff..fb839827a 100644
--- a/tests/vm_test.go
+++ b/tests/vm_test.go
@@ -30,7 +30,10 @@ func TestVM(t *testing.T) {
 
 	vmt.walk(t, vmTestDir, func(t *testing.T, name string, test *VMTest) {
 		withTrace(t, test.json.Exec.GasLimit, func(vmconfig vm.Config) error {
-			return vmt.checkFailure(t, name, test.Run(vmconfig))
+			return vmt.checkFailure(t, name+"/trie", test.Run(vmconfig, false))
+		})
+		withTrace(t, test.json.Exec.GasLimit, func(vmconfig vm.Config) error {
+			return vmt.checkFailure(t, name+"/snap", test.Run(vmconfig, true))
 		})
 	})
 }
diff --git a/tests/vm_test_util.go b/tests/vm_test_util.go
index 91566c47e..9acbe59f4 100644
--- a/tests/vm_test_util.go
+++ b/tests/vm_test_util.go
@@ -78,8 +78,8 @@ type vmExecMarshaling struct {
 	GasPrice *math.HexOrDecimal256
 }
 
-func (t *VMTest) Run(vmconfig vm.Config) error {
-	statedb := MakePreState(rawdb.NewMemoryDatabase(), t.json.Pre)
+func (t *VMTest) Run(vmconfig vm.Config, snapshotter bool) error {
+	statedb := MakePreState(rawdb.NewMemoryDatabase(), t.json.Pre, snapshotter)
 	ret, gasRemaining, err := t.exec(statedb, vmconfig)
 
 	if t.json.GasRemaining == nil {

From a4cf279494f53276cf3576ae89b14b58efe644fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 3 Mar 2020 15:52:00 +0200
Subject: [PATCH 22/28] core/state: extend snapshotter to handle account
 resurrections

---
 core/blockchain.go                    |   1 +
 core/blockchain_test.go               |  10 +-
 core/state/snapshot/difflayer.go      | 142 +++++++++++++++--------
 core/state/snapshot/difflayer_test.go | 157 ++++++++++++++++----------
 core/state/snapshot/disklayer.go      |   4 +-
 core/state/snapshot/disklayer_test.go |  30 ++---
 core/state/snapshot/iterator_test.go  | 101 +++++++++--------
 core/state/snapshot/journal.go        |  22 +++-
 core/state/snapshot/snapshot.go       |  63 ++++++-----
 core/state/snapshot/snapshot_test.go  |  28 ++---
 core/state/statedb.go                 |  45 ++++----
 core/vm/opcodes.go                    |  13 +--
 12 files changed, 365 insertions(+), 251 deletions(-)

diff --git a/core/blockchain.go b/core/blockchain.go
index b0309ef70..de0d4f399 100644
--- a/core/blockchain.go
+++ b/core/blockchain.go
@@ -198,6 +198,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 			TrieDirtyLimit: 256,
 			TrieTimeLimit:  5 * time.Minute,
 			SnapshotLimit:  256,
+			SnapshotWait:   true,
 		}
 	}
 	bodyCache, _ := lru.New(bodyCacheLimit)
diff --git a/core/blockchain_test.go b/core/blockchain_test.go
index 69a245322..5e2a21023 100644
--- a/core/blockchain_test.go
+++ b/core/blockchain_test.go
@@ -2315,7 +2315,7 @@ func TestDeleteCreateRevert(t *testing.T) {
 				// The address 0xAAAAA selfdestructs if called
 				aa: {
 					// Code needs to just selfdestruct
-					Code:    []byte{byte(vm.PC), 0xFF},
+					Code:    []byte{byte(vm.PC), byte(vm.SELFDESTRUCT)},
 					Nonce:   1,
 					Balance: big.NewInt(0),
 				},
@@ -2382,8 +2382,8 @@ func TestDeleteRecreateSlots(t *testing.T) {
 
 		aa        = common.HexToAddress("0x7217d81b76bdd8707601e959454e3d776aee5f43")
 		bb        = common.HexToAddress("0x000000000000000000000000000000000000bbbb")
-		aaStorage = make(map[common.Hash]common.Hash) // Initial storage in AA
-		aaCode    = []byte{byte(vm.PC), 0xFF}         // Code for AA (simple selfdestruct)
+		aaStorage = make(map[common.Hash]common.Hash)          // Initial storage in AA
+		aaCode    = []byte{byte(vm.PC), byte(vm.SELFDESTRUCT)} // Code for AA (simple selfdestruct)
 	)
 	// Populate two slots
 	aaStorage[common.HexToHash("01")] = common.HexToHash("01")
@@ -2506,8 +2506,8 @@ func TestDeleteRecreateAccount(t *testing.T) {
 		funds   = big.NewInt(1000000000)
 
 		aa        = common.HexToAddress("0x7217d81b76bdd8707601e959454e3d776aee5f43")
-		aaStorage = make(map[common.Hash]common.Hash) // Initial storage in AA
-		aaCode    = []byte{byte(vm.PC), 0xFF}         // Code for AA (simple selfdestruct)
+		aaStorage = make(map[common.Hash]common.Hash)          // Initial storage in AA
+		aaCode    = []byte{byte(vm.PC), byte(vm.SELFDESTRUCT)} // Code for AA (simple selfdestruct)
 	)
 	// Populate two slots
 	aaStorage[common.HexToHash("01")] = common.HexToHash("01")
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 3c1bea421..0915fb6bc 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -27,7 +27,6 @@ import (
 	"time"
 
 	"github.com/ethereum/go-ethereum/common"
-	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/rlp"
 	"github.com/steakknife/bloomfilter"
 )
@@ -68,17 +67,28 @@ var (
 	// entry count).
 	bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2))
 
-	// bloomHashesOffset is a runtime constant which determines which part of the
+	// the bloom offsets are runtime constants which determines which part of the
 	// the account/storage hash the hasher functions looks at, to determine the
 	// bloom key for an account/slot. This is randomized at init(), so that the
 	// global population of nodes do not all display the exact same behaviour with
 	// regards to bloom content
-	bloomHasherOffset = 0
+	bloomDestructHasherOffset = 0
+	bloomAccountHasherOffset  = 0
+	bloomStorageHasherOffset  = 0
 )
 
 func init() {
-	// Init bloomHasherOffset in the range [0:24] (requires 8 bytes)
-	bloomHasherOffset = rand.Intn(25)
+	// Init the bloom offsets in the range [0:24] (requires 8 bytes)
+	bloomDestructHasherOffset = rand.Intn(25)
+	bloomAccountHasherOffset = rand.Intn(25)
+	bloomStorageHasherOffset = rand.Intn(25)
+
+	// The destruct and account blooms must be different, as the storage slots
+	// will check for destruction too for every bloom miss. It should not collide
+	// with modified accounts.
+	for bloomAccountHasherOffset == bloomDestructHasherOffset {
+		bloomAccountHasherOffset = rand.Intn(25)
+	}
 }
 
 // diffLayer represents a collection of modifications made to a state snapshot
@@ -95,6 +105,7 @@ type diffLayer struct {
 	root  common.Hash // Root hash to which this snapshot diff belongs to
 	stale uint32      // Signals that the layer became stale (state progressed)
 
+	destructSet map[common.Hash]struct{}               // Keyed markers for deleted (and potentially) recreated accounts
 	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
 	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
 	storageList map[common.Hash][]common.Hash          // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil
@@ -105,6 +116,20 @@ type diffLayer struct {
 	lock sync.RWMutex
 }
 
+// destructBloomHasher is a wrapper around a common.Hash to satisfy the interface
+// API requirements of the bloom library used. It's used to convert a destruct
+// event into a 64 bit mini hash.
+type destructBloomHasher common.Hash
+
+func (h destructBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (h destructBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (h destructBloomHasher) Reset()                            { panic("not implemented") }
+func (h destructBloomHasher) BlockSize() int                    { panic("not implemented") }
+func (h destructBloomHasher) Size() int                         { return 8 }
+func (h destructBloomHasher) Sum64() uint64 {
+	return binary.BigEndian.Uint64(h[bloomDestructHasherOffset : bloomDestructHasherOffset+8])
+}
+
 // accountBloomHasher is a wrapper around a common.Hash to satisfy the interface
 // API requirements of the bloom library used. It's used to convert an account
 // hash into a 64 bit mini hash.
@@ -116,7 +141,7 @@ func (h accountBloomHasher) Reset()                            { panic("not impl
 func (h accountBloomHasher) BlockSize() int                    { panic("not implemented") }
 func (h accountBloomHasher) Size() int                         { return 8 }
 func (h accountBloomHasher) Sum64() uint64 {
-	return binary.BigEndian.Uint64(h[bloomHasherOffset : bloomHasherOffset+8])
+	return binary.BigEndian.Uint64(h[bloomAccountHasherOffset : bloomAccountHasherOffset+8])
 }
 
 // storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface
@@ -130,17 +155,18 @@ func (h storageBloomHasher) Reset()                            { panic("not impl
 func (h storageBloomHasher) BlockSize() int                    { panic("not implemented") }
 func (h storageBloomHasher) Size() int                         { return 8 }
 func (h storageBloomHasher) Sum64() uint64 {
-	return binary.BigEndian.Uint64(h[0][bloomHasherOffset:bloomHasherOffset+8]) ^
-		binary.BigEndian.Uint64(h[1][bloomHasherOffset:bloomHasherOffset+8])
+	return binary.BigEndian.Uint64(h[0][bloomStorageHasherOffset:bloomStorageHasherOffset+8]) ^
+		binary.BigEndian.Uint64(h[1][bloomStorageHasherOffset:bloomStorageHasherOffset+8])
 }
 
 // newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
 // level persistent database or a hierarchical diff already.
-func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
 	// Create the new layer with some pre-allocated data segments
 	dl := &diffLayer{
 		parent:      parent,
 		root:        root,
+		destructSet: destructs,
 		accountData: accounts,
 		storageData: storage,
 	}
@@ -152,6 +178,17 @@ func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]
 	default:
 		panic("unknown parent type")
 	}
+	// Sanity check that accounts or storage slots are never nil
+	for accountHash, blob := range accounts {
+		if blob == nil {
+			panic(fmt.Sprintf("account %#x nil", accountHash))
+		}
+	}
+	for accountHash, slots := range storage {
+		if slots == nil {
+			panic(fmt.Sprintf("storage %#x nil", accountHash))
+		}
+	}
 	// Determine memory size and track the dirty writes
 	for _, data := range accounts {
 		dl.memory += uint64(common.HashLength + len(data))
@@ -159,24 +196,11 @@ func newDiffLayer(parent snapshot, root common.Hash, accounts map[common.Hash][]
 	}
 	// Fill the storage hashes and sort them for the iterator
 	dl.storageList = make(map[common.Hash][]common.Hash)
-
-	for accountHash, slots := range storage {
-		// If the slots are nil, sanity check that it's a deleted account
-		if slots == nil {
-			// Ensure that the account was just marked as deleted
-			if account, ok := accounts[accountHash]; account != nil || !ok {
-				panic(fmt.Sprintf("storage in %#x nil, but account conflicts (%#x, exists: %v)", accountHash, account, ok))
-			}
-			// Everything ok, store the deletion mark and continue
-			dl.storageList[accountHash] = nil
-			continue
-		}
-		// Storage slots are not nil so entire contract was not deleted, ensure the
-		// account was just updated.
-		if account, ok := accounts[accountHash]; account == nil || !ok {
-			log.Error(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
-		}
-		// Determine memory size and track the dirty writes
+	for accountHash := range destructs {
+		dl.storageList[accountHash] = nil
+	}
+	// Determine memory size and track the dirty writes
+	for _, slots := range storage {
 		for _, data := range slots {
 			dl.memory += uint64(common.HashLength + len(data))
 			snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
@@ -208,6 +232,9 @@ func (dl *diffLayer) rebloom(origin *diskLayer) {
 		dl.diffed, _ = bloomfilter.New(uint64(bloomSize), uint64(bloomFuncs))
 	}
 	// Iterate over all the accounts and storage slots and index them
+	for hash := range dl.destructSet {
+		dl.diffed.Add(destructBloomHasher(hash))
+	}
 	for hash := range dl.accountData {
 		dl.diffed.Add(accountBloomHasher(hash))
 	}
@@ -265,6 +292,9 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	// all the maps in all the layers below
 	dl.lock.RLock()
 	hit := dl.diffed.Contains(accountBloomHasher(hash))
+	if !hit {
+		hit = dl.diffed.Contains(destructBloomHasher(hash))
+	}
 	dl.lock.RUnlock()
 
 	// If the bloom filter misses, don't even bother with traversing the memory
@@ -289,19 +319,22 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
 	if dl.Stale() {
 		return nil, ErrSnapshotStale
 	}
-	// If the account is known locally, return it. Note, a nil account means it was
-	// deleted, and is a different notion than an unknown account!
+	// If the account is known locally, return it
 	if data, ok := dl.accountData[hash]; ok {
 		snapshotDirtyAccountHitMeter.Mark(1)
 		snapshotDirtyAccountHitDepthHist.Update(int64(depth))
-		if n := len(data); n > 0 {
-			snapshotDirtyAccountReadMeter.Mark(int64(n))
-		} else {
-			snapshotDirtyAccountInexMeter.Mark(1)
-		}
+		snapshotDirtyAccountReadMeter.Mark(int64(len(data)))
 		snapshotBloomAccountTrueHitMeter.Mark(1)
 		return data, nil
 	}
+	// If the account is known locally, but deleted, return it
+	if _, ok := dl.destructSet[hash]; ok {
+		snapshotDirtyAccountHitMeter.Mark(1)
+		snapshotDirtyAccountHitDepthHist.Update(int64(depth))
+		snapshotDirtyAccountInexMeter.Mark(1)
+		snapshotBloomAccountTrueHitMeter.Mark(1)
+		return nil, nil
+	}
 	// Account unknown to this diff, resolve from parent
 	if diff, ok := dl.parent.(*diffLayer); ok {
 		return diff.accountRLP(hash, depth+1)
@@ -319,6 +352,9 @@ func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 	// all the maps in all the layers below
 	dl.lock.RLock()
 	hit := dl.diffed.Contains(storageBloomHasher{accountHash, storageHash})
+	if !hit {
+		hit = dl.diffed.Contains(destructBloomHasher(accountHash))
+	}
 	dl.lock.RUnlock()
 
 	// If the bloom filter misses, don't even bother with traversing the memory
@@ -343,16 +379,8 @@ func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([
 	if dl.Stale() {
 		return nil, ErrSnapshotStale
 	}
-	// If the account is known locally, try to resolve the slot locally. Note, a nil
-	// account means it was deleted, and is a different notion than an unknown account!
+	// If the account is known locally, try to resolve the slot locally
 	if storage, ok := dl.storageData[accountHash]; ok {
-		if storage == nil {
-			snapshotDirtyStorageHitMeter.Mark(1)
-			snapshotDirtyStorageHitDepthHist.Update(int64(depth))
-			snapshotDirtyStorageInexMeter.Mark(1)
-			snapshotBloomStorageTrueHitMeter.Mark(1)
-			return nil, nil
-		}
 		if data, ok := storage[storageHash]; ok {
 			snapshotDirtyStorageHitMeter.Mark(1)
 			snapshotDirtyStorageHitDepthHist.Update(int64(depth))
@@ -365,6 +393,14 @@ func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([
 			return data, nil
 		}
 	}
+	// If the account is known locally, but deleted, return an empty slot
+	if _, ok := dl.destructSet[accountHash]; ok {
+		snapshotDirtyStorageHitMeter.Mark(1)
+		snapshotDirtyStorageHitDepthHist.Update(int64(depth))
+		snapshotDirtyStorageInexMeter.Mark(1)
+		snapshotBloomStorageTrueHitMeter.Mark(1)
+		return nil, nil
+	}
 	// Storage slot unknown to this diff, resolve from parent
 	if diff, ok := dl.parent.(*diffLayer); ok {
 		return diff.storage(accountHash, storageHash, depth+1)
@@ -376,8 +412,8 @@ func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([
 
 // Update creates a new layer on top of the existing snapshot diff tree with
 // the specified data items.
-func (dl *diffLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
-	return newDiffLayer(dl, blockRoot, accounts, storage)
+func (dl *diffLayer) Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	return newDiffLayer(dl, blockRoot, destructs, accounts, storage)
 }
 
 // flatten pushes all data from this point downwards, flattening everything into
@@ -403,14 +439,18 @@ func (dl *diffLayer) flatten() snapshot {
 		panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo
 	}
 	// Overwrite all the updated accounts blindly, merge the sorted list
+	for hash := range dl.destructSet {
+		parent.destructSet[hash] = struct{}{}
+		delete(parent.accountData, hash)
+		delete(parent.storageData, hash)
+	}
 	for hash, data := range dl.accountData {
 		parent.accountData[hash] = data
 	}
 	// Overwrite all the updated storage slots (individually)
 	for accountHash, storage := range dl.storageData {
-		// If storage didn't exist (or was deleted) in the parent; or if the storage
-		// was freshly deleted in the child, overwrite blindly
-		if parent.storageData[accountHash] == nil || storage == nil {
+		// If storage didn't exist (or was deleted) in the parent, overwrite blindly
+		if _, ok := parent.storageData[accountHash]; !ok {
 			parent.storageData[accountHash] = storage
 			continue
 		}
@@ -426,6 +466,7 @@ func (dl *diffLayer) flatten() snapshot {
 		parent:      parent.parent,
 		origin:      parent.origin,
 		root:        dl.root,
+		destructSet: parent.destructSet,
 		accountData: parent.accountData,
 		storageData: parent.storageData,
 		storageList: make(map[common.Hash][]common.Hash),
@@ -451,7 +492,10 @@ func (dl *diffLayer) AccountList() []common.Hash {
 	dl.lock.Lock()
 	defer dl.lock.Unlock()
 
-	dl.accountList = make([]common.Hash, 0, len(dl.accountData))
+	dl.accountList = make([]common.Hash, 0, len(dl.destructSet)+len(dl.accountData))
+	for hash := range dl.destructSet {
+		dl.accountList = append(dl.accountList, hash)
+	}
 	for hash := range dl.accountData {
 		dl.accountList = append(dl.accountList, hash)
 	}
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index d8212d317..61d2ed9c0 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -30,8 +30,9 @@ import (
 // TestMergeBasics tests some simple merges
 func TestMergeBasics(t *testing.T) {
 	var (
-		accounts = make(map[common.Hash][]byte)
-		storage  = make(map[common.Hash]map[common.Hash][]byte)
+		destructs = make(map[common.Hash]struct{})
+		accounts  = make(map[common.Hash][]byte)
+		storage   = make(map[common.Hash]map[common.Hash][]byte)
 	)
 	// Fill up a parent
 	for i := 0; i < 100; i++ {
@@ -39,7 +40,10 @@ func TestMergeBasics(t *testing.T) {
 		data := randomAccount()
 
 		accounts[h] = data
-		if rand.Intn(20) < 10 {
+		if rand.Intn(4) == 0 {
+			destructs[h] = struct{}{}
+		}
+		if rand.Intn(2) == 0 {
 			accStorage := make(map[common.Hash][]byte)
 			value := make([]byte, 32)
 			rand.Read(value)
@@ -48,20 +52,18 @@ func TestMergeBasics(t *testing.T) {
 		}
 	}
 	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
-	child := newDiffLayer(parent, common.Hash{}, accounts, storage)
-	child = newDiffLayer(child, common.Hash{}, accounts, storage)
-	child = newDiffLayer(child, common.Hash{}, accounts, storage)
-	child = newDiffLayer(child, common.Hash{}, accounts, storage)
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage)
+	child := newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
+	child = newDiffLayer(child, common.Hash{}, destructs, accounts, storage)
+	child = newDiffLayer(child, common.Hash{}, destructs, accounts, storage)
+	child = newDiffLayer(child, common.Hash{}, destructs, accounts, storage)
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
 
 	{ // Check account lists
-		// Should be zero/nil first
 		if got, exp := len(merged.accountList), 0; got != exp {
 			t.Errorf("accountList wrong, got %v exp %v", got, exp)
 		}
-		// Then set when we call AccountList
 		if got, exp := len(merged.AccountList()), len(accounts); got != exp {
 			t.Errorf("AccountList() wrong, got %v exp %v", got, exp)
 		}
@@ -69,6 +71,11 @@ func TestMergeBasics(t *testing.T) {
 			t.Errorf("accountList [2] wrong, got %v exp %v", got, exp)
 		}
 	}
+	{ // Check account drops
+		if got, exp := len(merged.destructSet), len(destructs); got != exp {
+			t.Errorf("accountDrop wrong, got %v exp %v", got, exp)
+		}
+	}
 	{ // Check storage lists
 		i := 0
 		for aHash, sMap := range storage {
@@ -95,42 +102,61 @@ func TestMergeDelete(t *testing.T) {
 	h1 := common.HexToHash("0x01")
 	h2 := common.HexToHash("0x02")
 
-	flip := func() map[common.Hash][]byte {
-		accs := make(map[common.Hash][]byte)
-		accs[h1] = randomAccount()
-		accs[h2] = nil
-		return accs
+	flipDrops := func() map[common.Hash]struct{} {
+		return map[common.Hash]struct{}{
+			h2: struct{}{},
+		}
 	}
-	flop := func() map[common.Hash][]byte {
-		accs := make(map[common.Hash][]byte)
-		accs[h1] = nil
-		accs[h2] = randomAccount()
-		return accs
+	flipAccs := func() map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			h1: randomAccount(),
+		}
 	}
-
-	// Add some flip-flopping layers on top
-	parent := newDiffLayer(emptyLayer(), common.Hash{}, flip(), storage)
-	child := parent.Update(common.Hash{}, flop(), storage)
-	child = child.Update(common.Hash{}, flip(), storage)
-	child = child.Update(common.Hash{}, flop(), storage)
-	child = child.Update(common.Hash{}, flip(), storage)
-	child = child.Update(common.Hash{}, flop(), storage)
-	child = child.Update(common.Hash{}, flip(), storage)
+	flopDrops := func() map[common.Hash]struct{} {
+		return map[common.Hash]struct{}{
+			h1: struct{}{},
+		}
+	}
+	flopAccs := func() map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			h2: randomAccount(),
+		}
+	}
+	// Add some flipAccs-flopping layers on top
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, flipDrops(), flipAccs(), storage)
+	child := parent.Update(common.Hash{}, flopDrops(), flopAccs(), storage)
+	child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage)
+	child = child.Update(common.Hash{}, flopDrops(), flopAccs(), storage)
+	child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage)
+	child = child.Update(common.Hash{}, flopDrops(), flopAccs(), storage)
+	child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage)
 
 	if data, _ := child.Account(h1); data == nil {
-		t.Errorf("last diff layer: expected %x to be non-nil", h1)
+		t.Errorf("last diff layer: expected %x account to be non-nil", h1)
 	}
 	if data, _ := child.Account(h2); data != nil {
-		t.Errorf("last diff layer: expected %x to be nil", h2)
+		t.Errorf("last diff layer: expected %x account to be nil", h2)
+	}
+	if _, ok := child.destructSet[h1]; ok {
+		t.Errorf("last diff layer: expected %x drop to be missing", h1)
+	}
+	if _, ok := child.destructSet[h2]; !ok {
+		t.Errorf("last diff layer: expected %x drop to be present", h1)
 	}
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
 
 	if data, _ := merged.Account(h1); data == nil {
-		t.Errorf("merged layer: expected %x to be non-nil", h1)
+		t.Errorf("merged layer: expected %x account to be non-nil", h1)
 	}
 	if data, _ := merged.Account(h2); data != nil {
-		t.Errorf("merged layer: expected %x to be nil", h2)
+		t.Errorf("merged layer: expected %x account to be nil", h2)
+	}
+	if _, ok := merged.destructSet[h1]; !ok { // Note, drops stay alive until persisted to disk!
+		t.Errorf("merged diff layer: expected %x drop to be present", h1)
+	}
+	if _, ok := merged.destructSet[h2]; !ok { // Note, drops stay alive until persisted to disk!
+		t.Errorf("merged diff layer: expected %x drop to be present", h1)
 	}
 	// If we add more granular metering of memory, we can enable this again,
 	// but it's not implemented for now
@@ -150,18 +176,23 @@ func TestInsertAndMerge(t *testing.T) {
 		child  *diffLayer
 	)
 	{
-		var accounts = make(map[common.Hash][]byte)
-		var storage = make(map[common.Hash]map[common.Hash][]byte)
-		parent = newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		parent = newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage)
 	}
 	{
-		var accounts = make(map[common.Hash][]byte)
-		var storage = make(map[common.Hash]map[common.Hash][]byte)
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
 		accounts[acc] = randomAccount()
-		accstorage := make(map[common.Hash][]byte)
-		storage[acc] = accstorage
+		storage[acc] = make(map[common.Hash][]byte)
 		storage[acc][slot] = []byte{0x01}
-		child = newDiffLayer(parent, common.Hash{}, accounts, storage)
+		child = newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
 	}
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
@@ -189,20 +220,21 @@ func emptyLayer() *diskLayer {
 func BenchmarkSearch(b *testing.B) {
 	// First, we set up 128 diff layers, with 1K items each
 	fill := func(parent snapshot) *diffLayer {
-		accounts := make(map[common.Hash][]byte)
-		storage := make(map[common.Hash]map[common.Hash][]byte)
-
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
 		for i := 0; i < 10000; i++ {
 			accounts[randomHash()] = randomAccount()
 		}
-		return newDiffLayer(parent, common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
 	}
 	var layer snapshot
 	layer = emptyLayer()
 	for i := 0; i < 128; i++ {
 		layer = fill(layer)
 	}
-
 	key := crypto.Keccak256Hash([]byte{0x13, 0x38})
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -224,9 +256,12 @@ func BenchmarkSearchSlot(b *testing.B) {
 	storageKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
 	accountRLP := randomAccount()
 	fill := func(parent snapshot) *diffLayer {
-		accounts := make(map[common.Hash][]byte)
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
 		accounts[accountKey] = accountRLP
-		storage := make(map[common.Hash]map[common.Hash][]byte)
 
 		accStorage := make(map[common.Hash][]byte)
 		for i := 0; i < 5; i++ {
@@ -235,7 +270,7 @@ func BenchmarkSearchSlot(b *testing.B) {
 			accStorage[randomHash()] = value
 			storage[accountKey] = accStorage
 		}
-		return newDiffLayer(parent, common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
 	}
 	var layer snapshot
 	layer = emptyLayer()
@@ -249,15 +284,17 @@ func BenchmarkSearchSlot(b *testing.B) {
 }
 
 // With accountList and sorting
-//BenchmarkFlatten-6   	      50	  29890856 ns/op
+// BenchmarkFlatten-6   	      50	  29890856 ns/op
 //
 // Without sorting and tracking accountlist
 // BenchmarkFlatten-6   	     300	   5511511 ns/op
 func BenchmarkFlatten(b *testing.B) {
 	fill := func(parent snapshot) *diffLayer {
-		accounts := make(map[common.Hash][]byte)
-		storage := make(map[common.Hash]map[common.Hash][]byte)
-
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
 		for i := 0; i < 100; i++ {
 			accountKey := randomHash()
 			accounts[accountKey] = randomAccount()
@@ -271,11 +308,9 @@ func BenchmarkFlatten(b *testing.B) {
 			}
 			storage[accountKey] = accStorage
 		}
-		return newDiffLayer(parent, common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
 	}
-
 	b.ResetTimer()
-
 	for i := 0; i < b.N; i++ {
 		b.StopTimer()
 		var layer snapshot
@@ -305,9 +340,11 @@ func BenchmarkFlatten(b *testing.B) {
 // BenchmarkJournal-6   	       1	1208083335 ns/op // bufio writer
 func BenchmarkJournal(b *testing.B) {
 	fill := func(parent snapshot) *diffLayer {
-		accounts := make(map[common.Hash][]byte)
-		storage := make(map[common.Hash]map[common.Hash][]byte)
-
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
 		for i := 0; i < 200; i++ {
 			accountKey := randomHash()
 			accounts[accountKey] = randomAccount()
@@ -321,7 +358,7 @@ func BenchmarkJournal(b *testing.B) {
 			}
 			storage[accountKey] = accStorage
 		}
-		return newDiffLayer(parent, common.Hash{}, accounts, storage)
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
 	}
 	layer := snapshot(new(diskLayer))
 	for i := 1; i < 128; i++ {
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
index 3266424a8..e8f2bc853 100644
--- a/core/state/snapshot/disklayer.go
+++ b/core/state/snapshot/disklayer.go
@@ -161,6 +161,6 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
 // Update creates a new layer on top of the existing snapshot diff tree with
 // the specified data items. Note, the maps are retained by the method to avoid
 // copying everything.
-func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
-	return newDiffLayer(dl, blockHash, accounts, storage)
+func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	return newDiffLayer(dl, blockHash, destructs, accounts, storage)
 }
diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go
index b8dded0d8..aae2aa6b5 100644
--- a/core/state/snapshot/disklayer_test.go
+++ b/core/state/snapshot/disklayer_test.go
@@ -116,13 +116,14 @@ func TestDiskMerge(t *testing.T) {
 	base.Storage(conNukeCache, conNukeCacheSlot)
 
 	// Modify or delete some accounts, flatten everything onto disk
-	if err := snaps.Update(diffRoot, baseRoot, map[common.Hash][]byte{
-		accModNoCache:  reverse(accModNoCache[:]),
-		accModCache:    reverse(accModCache[:]),
-		accDelNoCache:  nil,
-		accDelCache:    nil,
-		conNukeNoCache: nil,
-		conNukeCache:   nil,
+	if err := snaps.Update(diffRoot, baseRoot, map[common.Hash]struct{}{
+		accDelNoCache:  struct{}{},
+		accDelCache:    struct{}{},
+		conNukeNoCache: struct{}{},
+		conNukeCache:   struct{}{},
+	}, map[common.Hash][]byte{
+		accModNoCache: reverse(accModNoCache[:]),
+		accModCache:   reverse(accModCache[:]),
 	}, map[common.Hash]map[common.Hash][]byte{
 		conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])},
 		conModCache:   {conModCacheSlot: reverse(conModCacheSlot[:])},
@@ -338,13 +339,14 @@ func TestDiskPartialMerge(t *testing.T) {
 		assertStorage(conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
 
 		// Modify or delete some accounts, flatten everything onto disk
-		if err := snaps.Update(diffRoot, baseRoot, map[common.Hash][]byte{
-			accModNoCache:  reverse(accModNoCache[:]),
-			accModCache:    reverse(accModCache[:]),
-			accDelNoCache:  nil,
-			accDelCache:    nil,
-			conNukeNoCache: nil,
-			conNukeCache:   nil,
+		if err := snaps.Update(diffRoot, baseRoot, map[common.Hash]struct{}{
+			accDelNoCache:  struct{}{},
+			accDelCache:    struct{}{},
+			conNukeNoCache: struct{}{},
+			conNukeCache:   struct{}{},
+		}, map[common.Hash][]byte{
+			accModNoCache: reverse(accModNoCache[:]),
+			accModCache:   reverse(accModCache[:]),
 		}, map[common.Hash]map[common.Hash][]byte{
 			conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])},
 			conModCache:   {conModCacheSlot: reverse(conModCacheSlot[:])},
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
index dbfafd73d..832be10a4 100644
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
@@ -28,18 +28,23 @@ import (
 	"github.com/ethereum/go-ethereum/core/rawdb"
 )
 
-// TestIteratorBasics tests some simple single-layer iteration
-func TestIteratorBasics(t *testing.T) {
+// TestAccountIteratorBasics tests some simple single-layer iteration
+func TestAccountIteratorBasics(t *testing.T) {
 	var (
-		accounts = make(map[common.Hash][]byte)
-		storage  = make(map[common.Hash]map[common.Hash][]byte)
+		destructs = make(map[common.Hash]struct{})
+		accounts  = make(map[common.Hash][]byte)
+		storage   = make(map[common.Hash]map[common.Hash][]byte)
 	)
 	// Fill up a parent
 	for i := 0; i < 100; i++ {
 		h := randomHash()
 		data := randomAccount()
+
 		accounts[h] = data
-		if rand.Intn(20) < 10 {
+		if rand.Intn(4) == 0 {
+			destructs[h] = struct{}{}
+		}
+		if rand.Intn(2) == 0 {
 			accStorage := make(map[common.Hash][]byte)
 			value := make([]byte, 32)
 			rand.Read(value)
@@ -48,7 +53,7 @@ func TestIteratorBasics(t *testing.T) {
 		}
 	}
 	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage)
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage)
 	it := parent.AccountIterator(common.Hash{})
 	verifyIterator(t, 100, it)
 }
@@ -138,8 +143,8 @@ func verifyIterator(t *testing.T, expCount int, it AccountIterator) {
 	}
 }
 
-// TestIteratorTraversal tests some simple multi-layer iteration.
-func TestIteratorTraversal(t *testing.T) {
+// TestAccountIteratorTraversal tests some simple multi-layer iteration.
+func TestAccountIteratorTraversal(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
 		diskdb: rawdb.NewMemoryDatabase(),
@@ -152,13 +157,13 @@ func TestIteratorTraversal(t *testing.T) {
 		},
 	}
 	// Stack three diff layers on top with various overlaps
-	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
 		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
 
-	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
 		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
 
-	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
 		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
 
 	// Verify the single and multi-layer iterators
@@ -173,9 +178,9 @@ func TestIteratorTraversal(t *testing.T) {
 	verifyIterator(t, 7, it)
 }
 
-// TestIteratorTraversalValues tests some multi-layer iteration, where we
+// TestAccountIteratorTraversalValues tests some multi-layer iteration, where we
 // also expect the correct values to show up.
-func TestIteratorTraversalValues(t *testing.T) {
+func TestAccountIteratorTraversalValues(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
 		diskdb: rawdb.NewMemoryDatabase(),
@@ -223,14 +228,14 @@ func TestIteratorTraversalValues(t *testing.T) {
 		}
 	}
 	// Assemble a stack of snapshots from the account layers
-	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), a, nil)
-	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), b, nil)
-	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), c, nil)
-	snaps.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), d, nil)
-	snaps.Update(common.HexToHash("0x06"), common.HexToHash("0x05"), e, nil)
-	snaps.Update(common.HexToHash("0x07"), common.HexToHash("0x06"), f, nil)
-	snaps.Update(common.HexToHash("0x08"), common.HexToHash("0x07"), g, nil)
-	snaps.Update(common.HexToHash("0x09"), common.HexToHash("0x08"), h, nil)
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, a, nil)
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, b, nil)
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil, c, nil)
+	snaps.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), nil, d, nil)
+	snaps.Update(common.HexToHash("0x06"), common.HexToHash("0x05"), nil, e, nil)
+	snaps.Update(common.HexToHash("0x07"), common.HexToHash("0x06"), nil, f, nil)
+	snaps.Update(common.HexToHash("0x08"), common.HexToHash("0x07"), nil, g, nil)
+	snaps.Update(common.HexToHash("0x09"), common.HexToHash("0x08"), nil, h, nil)
 
 	it, _ := snaps.AccountIterator(common.HexToHash("0x09"), common.Hash{})
 	defer it.Release()
@@ -249,7 +254,7 @@ func TestIteratorTraversalValues(t *testing.T) {
 }
 
 // This testcase is notorious, all layers contain the exact same 200 accounts.
-func TestIteratorLargeTraversal(t *testing.T) {
+func TestAccountIteratorLargeTraversal(t *testing.T) {
 	// Create a custom account factory to recreate the same addresses
 	makeAccounts := func(num int) map[common.Hash][]byte {
 		accounts := make(map[common.Hash][]byte)
@@ -272,7 +277,7 @@ func TestIteratorLargeTraversal(t *testing.T) {
 		},
 	}
 	for i := 1; i < 128; i++ {
-		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), makeAccounts(200), nil)
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), nil, makeAccounts(200), nil)
 	}
 	// Iterate the entire stack and ensure everything is hit only once
 	head := snaps.Snapshot(common.HexToHash("0x80"))
@@ -285,11 +290,11 @@ func TestIteratorLargeTraversal(t *testing.T) {
 	verifyIterator(t, 200, it)
 }
 
-// TestIteratorFlattening tests what happens when we
+// TestAccountIteratorFlattening tests what happens when we
 // - have a live iterator on child C (parent C1 -> C2 .. CN)
 // - flattens C2 all the way into CN
 // - continues iterating
-func TestIteratorFlattening(t *testing.T) {
+func TestAccountIteratorFlattening(t *testing.T) {
 	// Create an empty base layer and a snapshot tree out of it
 	base := &diskLayer{
 		diskdb: rawdb.NewMemoryDatabase(),
@@ -302,13 +307,13 @@ func TestIteratorFlattening(t *testing.T) {
 		},
 	}
 	// Create a stack of diffs on top
-	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
 		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
 
-	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
 		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
 
-	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
 		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
 
 	// Create an iterator and flatten the data from underneath it
@@ -321,7 +326,7 @@ func TestIteratorFlattening(t *testing.T) {
 	//verifyIterator(t, 7, it)
 }
 
-func TestIteratorSeek(t *testing.T) {
+func TestAccountIteratorSeek(t *testing.T) {
 	// Create a snapshot stack with some initial data
 	base := &diskLayer{
 		diskdb: rawdb.NewMemoryDatabase(),
@@ -333,13 +338,13 @@ func TestIteratorSeek(t *testing.T) {
 			base.root: base,
 		},
 	}
-	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
 		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
 
-	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
 		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
 
-	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
 		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
 
 	// Construct various iterators and ensure their tranversal is correct
@@ -372,18 +377,18 @@ func TestIteratorSeek(t *testing.T) {
 	verifyIterator(t, 0, it) // expected: nothing
 }
 
-// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the
+// BenchmarkAccountIteratorTraversal is a bit a bit notorious -- all layers contain the
 // exact same 200 accounts. That means that we need to process 2000 items, but
 // only spit out 200 values eventually.
 //
 // The value-fetching benchmark is easy on the binary iterator, since it never has to reach
 // down at any depth for retrieving the values -- all are on the toppmost layer
 //
-// BenchmarkIteratorTraversal/binary_iterator_keys-6         	    2239	    483674 ns/op
-// BenchmarkIteratorTraversal/binary_iterator_values-6       	    2403	    501810 ns/op
-// BenchmarkIteratorTraversal/fast_iterator_keys-6           	    1923	    677966 ns/op
-// BenchmarkIteratorTraversal/fast_iterator_values-6         	    1741	    649967 ns/op
-func BenchmarkIteratorTraversal(b *testing.B) {
+// BenchmarkAccountIteratorTraversal/binary_iterator_keys-6         	    2239	    483674 ns/op
+// BenchmarkAccountIteratorTraversal/binary_iterator_values-6       	    2403	    501810 ns/op
+// BenchmarkAccountIteratorTraversal/fast_iterator_keys-6           	    1923	    677966 ns/op
+// BenchmarkAccountIteratorTraversal/fast_iterator_values-6         	    1741	    649967 ns/op
+func BenchmarkAccountIteratorTraversal(b *testing.B) {
 	// Create a custom account factory to recreate the same addresses
 	makeAccounts := func(num int) map[common.Hash][]byte {
 		accounts := make(map[common.Hash][]byte)
@@ -406,7 +411,7 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 		},
 	}
 	for i := 1; i <= 100; i++ {
-		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), makeAccounts(200), nil)
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), nil, makeAccounts(200), nil)
 	}
 	// We call this once before the benchmark, so the creation of
 	// sorted accountlists are not included in the results.
@@ -469,17 +474,17 @@ func BenchmarkIteratorTraversal(b *testing.B) {
 	})
 }
 
-// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where
+// BenchmarkAccountIteratorLargeBaselayer is a pretty realistic benchmark, where
 // the baselayer is a lot larger than the upper layer.
 //
 // This is heavy on the binary iterator, which in most cases will have to
 // call recursively 100 times for the majority of the values
 //
-// BenchmarkIteratorLargeBaselayer/binary_iterator_(keys)-6         	     514	   1971999 ns/op
-// BenchmarkIteratorLargeBaselayer/binary_iterator_(values)-6       	      61	  18997492 ns/op
-// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6           	   10000	    114385 ns/op
-// BenchmarkIteratorLargeBaselayer/fast_iterator_(values)-6         	    4047	    296823 ns/op
-func BenchmarkIteratorLargeBaselayer(b *testing.B) {
+// BenchmarkAccountIteratorLargeBaselayer/binary_iterator_(keys)-6         	     514	   1971999 ns/op
+// BenchmarkAccountIteratorLargeBaselayer/binary_iterator_(values)-6       	      61	  18997492 ns/op
+// BenchmarkAccountIteratorLargeBaselayer/fast_iterator_(keys)-6           	   10000	    114385 ns/op
+// BenchmarkAccountIteratorLargeBaselayer/fast_iterator_(values)-6         	    4047	    296823 ns/op
+func BenchmarkAccountIteratorLargeBaselayer(b *testing.B) {
 	// Create a custom account factory to recreate the same addresses
 	makeAccounts := func(num int) map[common.Hash][]byte {
 		accounts := make(map[common.Hash][]byte)
@@ -501,9 +506,9 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) {
 			base.root: base,
 		},
 	}
-	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), makeAccounts(2000), nil)
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, makeAccounts(2000), nil)
 	for i := 2; i <= 100; i++ {
-		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), makeAccounts(20), nil)
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), nil, makeAccounts(20), nil)
 	}
 	// We call this once before the benchmark, so the creation of
 	// sorted accountlists are not included in the results.
@@ -590,7 +595,7 @@ func benchmarkAccountIteration(b *testing.B, iterator func(snap snapshot) Accoun
 	}
 	stack := snapshot(emptyLayer())
 	for _, layer := range layers {
-		stack = stack.Update(common.Hash{}, layer, nil)
+		stack = stack.Update(common.Hash{}, layer, nil, nil)
 	}
 	// Reset the timers and report all the stats
 	it := iterator(stack)
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
index c42a26d21..66c7aee0a 100644
--- a/core/state/snapshot/journal.go
+++ b/core/state/snapshot/journal.go
@@ -43,6 +43,11 @@ type journalGenerator struct {
 	Storage  uint64
 }
 
+// journalDestruct is an account deletion entry in a diffLayer's disk journal.
+type journalDestruct struct {
+	Hash common.Hash
+}
+
 // journalAccount is an account entry in a diffLayer's disk journal.
 type journalAccount struct {
 	Hash common.Hash
@@ -139,6 +144,14 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
 		}
 		return nil, fmt.Errorf("load diff root: %v", err)
 	}
+	var destructs []journalDestruct
+	if err := r.Decode(&destructs); err != nil {
+		return nil, fmt.Errorf("load diff destructs: %v", err)
+	}
+	destructSet := make(map[common.Hash]struct{})
+	for _, entry := range destructs {
+		destructSet[entry.Hash] = struct{}{}
+	}
 	var accounts []journalAccount
 	if err := r.Decode(&accounts); err != nil {
 		return nil, fmt.Errorf("load diff accounts: %v", err)
@@ -159,7 +172,7 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
 		}
 		storageData[entry.Hash] = slots
 	}
-	return loadDiffLayer(newDiffLayer(parent, root, accountData, storageData), r)
+	return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r)
 }
 
 // Journal writes the persistent layer generator stats into a buffer to be stored
@@ -218,6 +231,13 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
 	if err := rlp.Encode(buffer, dl.root); err != nil {
 		return common.Hash{}, err
 	}
+	destructs := make([]journalDestruct, 0, len(dl.destructSet))
+	for hash := range dl.destructSet {
+		destructs = append(destructs, journalDestruct{Hash: hash})
+	}
+	if err := rlp.Encode(buffer, destructs); err != nil {
+		return common.Hash{}, err
+	}
 	accounts := make([]journalAccount, 0, len(dl.accountData))
 	for hash, blob := range dl.accountData {
 		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
index d031dd2c1..27a8c7f0b 100644
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -125,7 +125,7 @@ type snapshot interface {
 	// the specified data items.
 	//
 	// Note, the maps are retained by the method to avoid copying everything.
-	Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
+	Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
 
 	// Journal commits an entire diff hierarchy to disk into a single journal entry.
 	// This is meant to be used during shutdown to persist the snapshot without
@@ -222,7 +222,7 @@ func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
 
 // Update adds a new snapshot into the tree, if that can be linked to an existing
 // old parent. It is disallowed to insert a disk layer (the origin of all).
-func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
+func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
 	// Reject noop updates to avoid self-loops in the snapshot tree. This is a
 	// special case that can only happen for Clique networks where empty blocks
 	// don't modify the state (0 block subsidy).
@@ -237,7 +237,7 @@ func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts ma
 	if parent == nil {
 		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
 	}
-	snap := parent.Update(blockRoot, accounts, storage)
+	snap := parent.Update(blockRoot, destructs, accounts, storage)
 
 	// Save the new snapshot for later
 	t.lock.Lock()
@@ -425,40 +425,43 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
 	base.stale = true
 	base.lock.Unlock()
 
-	// Push all the accounts into the database
+	// Destroy all the destructed accounts from the database
+	for hash := range bottom.destructSet {
+		// Skip any account not covered yet by the snapshot
+		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
+			continue
+		}
+		// Remove all storage slots
+		rawdb.DeleteAccountSnapshot(batch, hash)
+		base.cache.Set(hash[:], nil)
+
+		it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
+		for it.Next() {
+			if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
+				batch.Delete(key)
+				base.cache.Del(key[1:])
+
+				snapshotFlushStorageItemMeter.Mark(1)
+			}
+		}
+		it.Release()
+	}
+	// Push all updated accounts into the database
 	for hash, data := range bottom.accountData {
 		// Skip any account not covered yet by the snapshot
 		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
 			continue
 		}
-		if len(data) > 0 {
-			// Account was updated, push to disk
-			rawdb.WriteAccountSnapshot(batch, hash, data)
-			base.cache.Set(hash[:], data)
-			snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
+		// Push the account to disk
+		rawdb.WriteAccountSnapshot(batch, hash, data)
+		base.cache.Set(hash[:], data)
+		snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
 
-			if batch.ValueSize() > ethdb.IdealBatchSize {
-				if err := batch.Write(); err != nil {
-					log.Crit("Failed to write account snapshot", "err", err)
-				}
-				batch.Reset()
+		if batch.ValueSize() > ethdb.IdealBatchSize {
+			if err := batch.Write(); err != nil {
+				log.Crit("Failed to write account snapshot", "err", err)
 			}
-		} else {
-			// Account was deleted, remove all storage slots too
-			rawdb.DeleteAccountSnapshot(batch, hash)
-			base.cache.Set(hash[:], nil)
-
-			it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
-			for it.Next() {
-				if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
-					batch.Delete(key)
-					base.cache.Del(key[1:])
-
-					snapshotFlushStorageItemMeter.Mark(1)
-					snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
-				}
-			}
-			it.Release()
+			batch.Reset()
 		}
 		snapshotFlushAccountItemMeter.Mark(1)
 		snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
index 2b1482817..910923841 100644
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -81,7 +81,7 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 2 {
@@ -91,7 +91,7 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
 	if err := snaps.Cap(common.HexToHash("0x02"), 0); err != nil {
 		t.Fatalf("failed to merge diff layer onto disk: %v", err)
 	}
-	// Since the base layer was modified, ensure that data retrievald on the external reference fail
+	// Since the base layer was modified, ensure that data retrieval on the external reference fail
 	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
 		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
 	}
@@ -125,10 +125,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 3 {
@@ -173,10 +173,10 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 3 {
@@ -220,13 +220,13 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
 	accounts := map[common.Hash][]byte{
 		common.HexToHash("0xa1"): randomAccount(),
 	}
-	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
-	if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), accounts, nil); err != nil {
+	if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil, accounts, nil); err != nil {
 		t.Fatalf("failed to create a diff layer: %v", err)
 	}
 	if n := len(snaps.layers); n != 4 {
@@ -280,12 +280,12 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 		},
 	}
 	// The lowest difflayer
-	snaps.Update(common.HexToHash("0xa1"), common.HexToHash("0x01"), setAccount("0xa1"), nil)
-	snaps.Update(common.HexToHash("0xa2"), common.HexToHash("0xa1"), setAccount("0xa2"), nil)
-	snaps.Update(common.HexToHash("0xb2"), common.HexToHash("0xa1"), setAccount("0xb2"), nil)
+	snaps.Update(common.HexToHash("0xa1"), common.HexToHash("0x01"), nil, setAccount("0xa1"), nil)
+	snaps.Update(common.HexToHash("0xa2"), common.HexToHash("0xa1"), nil, setAccount("0xa2"), nil)
+	snaps.Update(common.HexToHash("0xb2"), common.HexToHash("0xa1"), nil, setAccount("0xb2"), nil)
 
-	snaps.Update(common.HexToHash("0xa3"), common.HexToHash("0xa2"), setAccount("0xa3"), nil)
-	snaps.Update(common.HexToHash("0xb3"), common.HexToHash("0xb2"), setAccount("0xb3"), nil)
+	snaps.Update(common.HexToHash("0xa3"), common.HexToHash("0xa2"), nil, setAccount("0xa3"), nil)
+	snaps.Update(common.HexToHash("0xb3"), common.HexToHash("0xb2"), nil, setAccount("0xb3"), nil)
 
 	// checkExist verifies if an account exiss in a snapshot
 	checkExist := func(layer *diffLayer, key string) error {
diff --git a/core/state/statedb.go b/core/state/statedb.go
index d4a91ee71..55e9752fa 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -67,10 +67,11 @@ type StateDB struct {
 	db   Database
 	trie Trie
 
-	snaps        *snapshot.Tree
-	snap         snapshot.Snapshot
-	snapAccounts map[common.Hash][]byte
-	snapStorage  map[common.Hash]map[common.Hash][]byte
+	snaps         *snapshot.Tree
+	snap          snapshot.Snapshot
+	snapDestructs map[common.Hash]struct{}
+	snapAccounts  map[common.Hash][]byte
+	snapStorage   map[common.Hash]map[common.Hash][]byte
 
 	// This map holds 'live' objects, which will get modified while processing a state transition.
 	stateObjects        map[common.Address]*stateObject
@@ -133,6 +134,7 @@ func New(root common.Hash, db Database, snaps *snapshot.Tree) (*StateDB, error)
 	}
 	if sdb.snaps != nil {
 		if sdb.snap = sdb.snaps.Snapshot(root); sdb.snap != nil {
+			sdb.snapDestructs = make(map[common.Hash]struct{})
 			sdb.snapAccounts = make(map[common.Hash][]byte)
 			sdb.snapStorage = make(map[common.Hash]map[common.Hash][]byte)
 		}
@@ -171,8 +173,9 @@ func (s *StateDB) Reset(root common.Hash) error {
 	s.clearJournalAndRefund()
 
 	if s.snaps != nil {
-		s.snapAccounts, s.snapStorage = nil, nil
+		s.snapAccounts, s.snapDestructs, s.snapStorage = nil, nil, nil
 		if s.snap = s.snaps.Snapshot(root); s.snap != nil {
+			s.snapDestructs = make(map[common.Hash]struct{})
 			s.snapAccounts = make(map[common.Hash][]byte)
 			s.snapStorage = make(map[common.Hash]map[common.Hash][]byte)
 		}
@@ -463,15 +466,6 @@ func (s *StateDB) updateStateObject(obj *stateObject) {
 		panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err))
 	}
 	s.setError(s.trie.TryUpdate(addr[:], data))
-
-	// If state snapshotting is active, cache the data til commit
-	if s.snap != nil {
-		// If the account is an empty resurrection, unmark the storage nil-ness
-		if storage, ok := s.snapStorage[obj.addrHash]; storage == nil && ok {
-			delete(s.snapStorage, obj.addrHash)
-		}
-		s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
-	}
 }
 
 // deleteStateObject removes the given object from the state trie.
@@ -483,12 +477,6 @@ func (s *StateDB) deleteStateObject(obj *stateObject) {
 	// Delete the account from the trie
 	addr := obj.Address()
 	s.setError(s.trie.TryDelete(addr[:]))
-
-	// If state snapshotting is active, cache the data til commit
-	if s.snap != nil {
-		s.snapAccounts[obj.addrHash] = nil // We need to maintain account deletions explicitly
-		s.snapStorage[obj.addrHash] = nil  // We need to maintain storage deletions explicitly
-	}
 }
 
 // getStateObject retrieves a state object given by the address, returning nil if
@@ -737,8 +725,23 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) {
 		}
 		if obj.suicided || (deleteEmptyObjects && obj.empty()) {
 			obj.deleted = true
+
+			// If state snapshotting is active, also mark the destruction there.
+			// Note, we can't do this only at the end of a block because multiple
+			// transactions within the same block might self destruct and then
+			// ressurrect an account and the snapshotter needs both events.
+			if s.snap != nil {
+				s.snapDestructs[obj.addrHash] = struct{}{} // We need to maintain account deletions explicitly (will remain set indefinitely)
+				delete(s.snapAccounts, obj.addrHash)       // Clear out any previously updated account data (may be recreated via a ressurrect)
+				delete(s.snapStorage, obj.addrHash)        // Clear out any previously updated storage data (may be recreated via a ressurrect)
+			}
 		} else {
 			obj.finalise()
+
+			// If state snapshotting is active, cache the data til commit
+			if s.snap != nil {
+				s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
+			}
 		}
 		s.stateObjectsPending[addr] = struct{}{}
 		s.stateObjectsDirty[addr] = struct{}{}
@@ -842,7 +845,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 		}
 		// Only update if there's a state transition (skip empty Clique blocks)
 		if parent := s.snap.Root(); parent != root {
-			if err := s.snaps.Update(root, parent, s.snapAccounts, s.snapStorage); err != nil {
+			if err := s.snaps.Update(root, parent, s.snapDestructs, s.snapAccounts, s.snapStorage); err != nil {
 				log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err)
 			}
 			if err := s.snaps.Cap(root, 127); err != nil { // Persistent layer is 128th, the last available trie
diff --git a/core/vm/opcodes.go b/core/vm/opcodes.go
index ba0ba01b8..71ef0724a 100644
--- a/core/vm/opcodes.go
+++ b/core/vm/opcodes.go
@@ -70,7 +70,7 @@ const (
 	SHR
 	SAR
 
-	SHA3 = 0x20
+	SHA3 OpCode = 0x20
 )
 
 // 0x30 range - closure state.
@@ -101,8 +101,8 @@ const (
 	NUMBER
 	DIFFICULTY
 	GASLIMIT
-	CHAINID     = 0x46
-	SELFBALANCE = 0x47
+	CHAINID     OpCode = 0x46
+	SELFBALANCE OpCode = 0x47
 )
 
 // 0x50 range - 'storage' and execution.
@@ -213,10 +213,9 @@ const (
 	RETURN
 	DELEGATECALL
 	CREATE2
-	STATICCALL = 0xfa
-
-	REVERT       = 0xfd
-	SELFDESTRUCT = 0xff
+	STATICCALL   OpCode = 0xfa
+	REVERT       OpCode = 0xfd
+	SELFDESTRUCT OpCode = 0xff
 )
 
 // Since the opcodes aren't all in order we can't use a regular slice.

From dcb22a9f99b19bb6b6d27cdba754ad740dc426c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 3 Mar 2020 16:55:06 +0200
Subject: [PATCH 23/28] core/state: fix account root hash update point

---
 core/state/statedb.go | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/core/state/statedb.go b/core/state/statedb.go
index 55e9752fa..ff2c6dac2 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -466,6 +466,14 @@ func (s *StateDB) updateStateObject(obj *stateObject) {
 		panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err))
 	}
 	s.setError(s.trie.TryUpdate(addr[:], data))
+
+	// If state snapshotting is active, cache the data til commit. Note, this
+	// update mechanism is not symmetric to the deletion, because whereas it is
+	// enough to track account updates at commit time, deletions need tracking
+	// at transaction boundary level to ensure we capture state clearing.
+	if s.snap != nil {
+		s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
+	}
 }
 
 // deleteStateObject removes the given object from the state trie.
@@ -729,7 +737,7 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) {
 			// If state snapshotting is active, also mark the destruction there.
 			// Note, we can't do this only at the end of a block because multiple
 			// transactions within the same block might self destruct and then
-			// ressurrect an account and the snapshotter needs both events.
+			// ressurrect an account; but the snapshotter needs both events.
 			if s.snap != nil {
 				s.snapDestructs[obj.addrHash] = struct{}{} // We need to maintain account deletions explicitly (will remain set indefinitely)
 				delete(s.snapAccounts, obj.addrHash)       // Clear out any previously updated account data (may be recreated via a ressurrect)
@@ -737,11 +745,6 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) {
 			}
 		} else {
 			obj.finalise()
-
-			// If state snapshotting is active, cache the data til commit
-			if s.snap != nil {
-				s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
-			}
 		}
 		s.stateObjectsPending[addr] = struct{}{}
 		s.stateObjectsDirty[addr] = struct{}{}

From 328de180a7172d2fc2894be11ad10548ef9d27e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Wed, 4 Mar 2020 10:19:53 +0200
Subject: [PATCH 24/28] core/state: fix resurrection state clearing and access

---
 core/state/state_object.go | 9 +++++++++
 core/state/statedb.go      | 5 ++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/core/state/state_object.go b/core/state/state_object.go
index 26e0b08f5..0833f2b0a 100644
--- a/core/state/state_object.go
+++ b/core/state/state_object.go
@@ -204,6 +204,15 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has
 		if metrics.EnabledExpensive {
 			defer func(start time.Time) { s.db.SnapshotStorageReads += time.Since(start) }(time.Now())
 		}
+		// If the object was destructed in *this* block (and potentially resurrected),
+		// the storage has been cleared out, and we should *not* consult the previous
+		// snapshot about any storage values. The only possible alternatives are:
+		//   1) resurrect happened, and new slot values were set -- those should
+		//      have been handles via pendingStorage above.
+		//   2) we don't have new values, and can deliver empty response back
+		if _, destructed := s.db.snapDestructs[s.addrHash]; destructed {
+			return common.Hash{}
+		}
 		enc, err = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key[:]))
 	}
 	// If snapshot unavailable or reading from it failed, load from the database
diff --git a/core/state/statedb.go b/core/state/statedb.go
index ff2c6dac2..038005685 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -595,6 +595,9 @@ func (s *StateDB) CreateAccount(addr common.Address) {
 	if prev != nil {
 		newObj.setBalance(prev.data.Balance)
 	}
+	if s.snap != nil && prev != nil {
+		s.snapDestructs[prev.addrHash] = struct{}{}
+	}
 }
 
 func (db *StateDB) ForEachStorage(addr common.Address, cb func(key, value common.Hash) bool) error {
@@ -855,7 +858,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 				log.Warn("Failed to cap snapshot tree", "root", root, "layers", 127, "err", err)
 			}
 		}
-		s.snap, s.snapAccounts, s.snapStorage = nil, nil, nil
+		s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil
 	}
 	return root, err
 }

From eff7cfbb03b74816513d415c0ecfe93ae83f4096 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Wed, 4 Mar 2020 13:38:55 +0100
Subject: [PATCH 25/28] core/state/snapshot: handle deleted accounts in fast
 iterator

---
 core/state/snapshot/iterator_fast.go | 34 +++++++++++++-----
 core/state/snapshot/iterator_test.go | 53 +++++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
index ef0212ac2..99734ec91 100644
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
@@ -164,17 +164,35 @@ func (fi *fastAccountIterator) Next() bool {
 		fi.curAccount = fi.iterators[0].it.Account()
 		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
 			fi.fail = innerErr
+			return false
 		}
-		return fi.Error() == nil
+		if fi.curAccount != nil {
+			return true
+		}
+		// Implicit else: we've hit a nil-account, and need to fall through to the
+		// loop below to land on something non-nil
 	}
-	if !fi.next(0) {
-		return false
+	// If an account is deleted in one of the layers, the key will still be there,
+	// but the actual value will be nil. However, the iterator should not
+	// export nil-values (but instead simply omit the key), so we need to loop
+	// here until we either
+	//  - get a non-nil value,
+	//  - hit an error,
+	//  - or exhaust the iterator
+	for {
+		if !fi.next(0) {
+			return false // exhausted
+		}
+		fi.curAccount = fi.iterators[0].it.Account()
+		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+			fi.fail = innerErr
+			return false // error
+		}
+		if fi.curAccount != nil {
+			break // non-nil value found
+		}
 	}
-	fi.curAccount = fi.iterators[0].it.Account()
-	if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
-		fi.fail = innerErr
-	}
-	return fi.Error() == nil
+	return true
 }
 
 // next handles the next operation internally and should be invoked when we know
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
index 832be10a4..935fafc2f 100644
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
@@ -130,9 +130,13 @@ func verifyIterator(t *testing.T, expCount int, it AccountIterator) {
 		last  = common.Hash{}
 	)
 	for it.Next() {
-		if hash := it.Hash(); bytes.Compare(last[:], hash[:]) >= 0 {
+		hash := it.Hash()
+		if bytes.Compare(last[:], hash[:]) >= 0 {
 			t.Errorf("wrong order: %x >= %x", last, hash)
 		}
+		if it.Account() == nil {
+			t.Errorf("iterator returned nil-value for hash %x", hash)
+		}
 		count++
 	}
 	if count != expCount {
@@ -377,6 +381,53 @@ func TestAccountIteratorSeek(t *testing.T) {
 	verifyIterator(t, 0, it) // expected: nothing
 }
 
+// TestIteratorDeletions tests that the iterator behaves correct when there are
+// deleted accounts (where the Account() value is nil). The iterator
+// should not output any accounts or nil-values for those cases.
+func TestIteratorDeletions(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+		randomAccountSet("0x11", "0x22", "0x33"), nil)
+
+	set := randomAccountSet("0x11", "0x22", "0x33")
+	deleted := common.HexToHash("0x22")
+	set[deleted] = nil
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), set, nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+		randomAccountSet("0x33", "0x44", "0x55"), nil)
+
+	// The output should be 11,33,44,55
+	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	// Do a quick check
+	verifyIterator(t, 4, it)
+	it.Release()
+
+	// And a more detailed verification that we indeed do not see '0x22'
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	defer it.Release()
+	for it.Next() {
+		hash := it.Hash()
+		if it.Account() == nil {
+			t.Errorf("iterator returned nil-value for hash %x", hash)
+		}
+		if hash == deleted {
+			t.Errorf("expected deleted elem %x to not be returned by iterator", deleted)
+		}
+	}
+}
+
 // BenchmarkAccountIteratorTraversal is a bit a bit notorious -- all layers contain the
 // exact same 200 accounts. That means that we need to process 2000 items, but
 // only spit out 200 values eventually.

From bc5d742c664245879ee80ecd968009b56f3e758c Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Wed, 4 Mar 2020 13:39:27 +0100
Subject: [PATCH 26/28] core: more blockchain tests

---
 core/blockchain_test.go | 207 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 201 insertions(+), 6 deletions(-)

diff --git a/core/blockchain_test.go b/core/blockchain_test.go
index 5e2a21023..b6b497ece 100644
--- a/core/blockchain_test.go
+++ b/core/blockchain_test.go
@@ -2376,11 +2376,9 @@ func TestDeleteRecreateSlots(t *testing.T) {
 		engine = ethash.NewFaker()
 		db     = rawdb.NewMemoryDatabase()
 		// A sender who makes transactions, has some funds
-		key, _  = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291")
-		address = crypto.PubkeyToAddress(key.PublicKey)
-		funds   = big.NewInt(1000000000)
-
-		aa        = common.HexToAddress("0x7217d81b76bdd8707601e959454e3d776aee5f43")
+		key, _    = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291")
+		address   = crypto.PubkeyToAddress(key.PublicKey)
+		funds     = big.NewInt(1000000000)
 		bb        = common.HexToAddress("0x000000000000000000000000000000000000bbbb")
 		aaStorage = make(map[common.Hash]common.Hash)          // Initial storage in AA
 		aaCode    = []byte{byte(vm.PC), byte(vm.SELFDESTRUCT)} // Code for AA (simple selfdestruct)
@@ -2403,7 +2401,7 @@ func TestDeleteRecreateSlots(t *testing.T) {
 		byte(vm.PUSH1), 0x4, // location
 		byte(vm.SSTORE), // Set slot[4] = 1
 		// Slots are set, now return the code
-		byte(vm.PUSH2), 0x88, 0xff, // Push code on stack
+		byte(vm.PUSH2), byte(vm.PC), byte(vm.SELFDESTRUCT), // Push code on stack
 		byte(vm.PUSH1), 0x0, // memory start on stack
 		byte(vm.MSTORE),
 		// Code is now in memory.
@@ -2428,6 +2426,10 @@ func TestDeleteRecreateSlots(t *testing.T) {
 		byte(vm.CREATE2),
 	}...)
 
+	initHash := crypto.Keccak256Hash(initCode)
+	aa := crypto.CreateAddress2(bb, [32]byte{}, initHash[:])
+	t.Logf("Destination address: %x\n", aa)
+
 	gspec := &Genesis{
 		Config: params.TestChainConfig,
 		Alloc: GenesisAlloc{
@@ -2563,3 +2565,196 @@ func TestDeleteRecreateAccount(t *testing.T) {
 		t.Errorf("got %x exp %x", got, exp)
 	}
 }
+
+// TestDeleteRecreateSlotsAcrossManyBlocks tests multiple state-transition that contains both deletion
+// and recreation of contract state.
+// Contract A exists, has slots 1 and 2 set
+// Tx 1: Selfdestruct A
+// Tx 2: Re-create A, set slots 3 and 4
+// Expected outcome is that _all_ slots are cleared from A, due to the selfdestruct,
+// and then the new slots exist
+func TestDeleteRecreateSlotsAcrossManyBlocks(t *testing.T) {
+	var (
+		// Generate a canonical chain to act as the main dataset
+		engine = ethash.NewFaker()
+		db     = rawdb.NewMemoryDatabase()
+		// A sender who makes transactions, has some funds
+		key, _    = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291")
+		address   = crypto.PubkeyToAddress(key.PublicKey)
+		funds     = big.NewInt(1000000000)
+		bb        = common.HexToAddress("0x000000000000000000000000000000000000bbbb")
+		aaStorage = make(map[common.Hash]common.Hash)          // Initial storage in AA
+		aaCode    = []byte{byte(vm.PC), byte(vm.SELFDESTRUCT)} // Code for AA (simple selfdestruct)
+	)
+	// Populate two slots
+	aaStorage[common.HexToHash("01")] = common.HexToHash("01")
+	aaStorage[common.HexToHash("02")] = common.HexToHash("02")
+
+	// The bb-code needs to CREATE2 the aa contract. It consists of
+	// both initcode and deployment code
+	// initcode:
+	// 1. Set slots 3=blocknum+1, 4=4,
+	// 2. Return aaCode
+
+	initCode := []byte{
+		byte(vm.PUSH1), 0x1, //
+		byte(vm.NUMBER),     // value = number + 1
+		byte(vm.ADD),        //
+		byte(vm.PUSH1), 0x3, // location
+		byte(vm.SSTORE),     // Set slot[3] = number + 1
+		byte(vm.PUSH1), 0x4, // value
+		byte(vm.PUSH1), 0x4, // location
+		byte(vm.SSTORE), // Set slot[4] = 4
+		// Slots are set, now return the code
+		byte(vm.PUSH2), byte(vm.PC), byte(vm.SELFDESTRUCT), // Push code on stack
+		byte(vm.PUSH1), 0x0, // memory start on stack
+		byte(vm.MSTORE),
+		// Code is now in memory.
+		byte(vm.PUSH1), 0x2, // size
+		byte(vm.PUSH1), byte(32 - 2), // offset
+		byte(vm.RETURN),
+	}
+	if l := len(initCode); l > 32 {
+		t.Fatalf("init code is too long for a pushx, need a more elaborate deployer")
+	}
+	bbCode := []byte{
+		// Push initcode onto stack
+		byte(vm.PUSH1) + byte(len(initCode)-1)}
+	bbCode = append(bbCode, initCode...)
+	bbCode = append(bbCode, []byte{
+		byte(vm.PUSH1), 0x0, // memory start on stack
+		byte(vm.MSTORE),
+		byte(vm.PUSH1), 0x00, // salt
+		byte(vm.PUSH1), byte(len(initCode)), // size
+		byte(vm.PUSH1), byte(32 - len(initCode)), // offset
+		byte(vm.PUSH1), 0x00, // endowment
+		byte(vm.CREATE2),
+	}...)
+
+	initHash := crypto.Keccak256Hash(initCode)
+	aa := crypto.CreateAddress2(bb, [32]byte{}, initHash[:])
+	t.Logf("Destination address: %x\n", aa)
+	gspec := &Genesis{
+		Config: params.TestChainConfig,
+		Alloc: GenesisAlloc{
+			address: {Balance: funds},
+			// The address 0xAAAAA selfdestructs if called
+			aa: {
+				// Code needs to just selfdestruct
+				Code:    aaCode,
+				Nonce:   1,
+				Balance: big.NewInt(0),
+				Storage: aaStorage,
+			},
+			// The contract BB recreates AA
+			bb: {
+				Code:    bbCode,
+				Balance: big.NewInt(1),
+			},
+		},
+	}
+	genesis := gspec.MustCommit(db)
+	var nonce uint64
+
+	type expectation struct {
+		exist    bool
+		blocknum int
+		values   map[int]int
+	}
+	var current = &expectation{
+		exist:    true, // exists in genesis
+		blocknum: 0,
+		values:   map[int]int{1: 1, 2: 2},
+	}
+	var expectations []*expectation
+	var newDestruct = func(e *expectation) *types.Transaction {
+		tx, _ := types.SignTx(types.NewTransaction(nonce, aa,
+			big.NewInt(0), 50000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		nonce++
+		if e.exist {
+			e.exist = false
+			e.values = nil
+		}
+		t.Logf("block %d; adding destruct\n", e.blocknum)
+		return tx
+	}
+	var newResurrect = func(e *expectation) *types.Transaction {
+		tx, _ := types.SignTx(types.NewTransaction(nonce, bb,
+			big.NewInt(0), 100000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		nonce++
+		if !e.exist {
+			e.exist = true
+			e.values = map[int]int{3: e.blocknum + 1, 4: 4}
+		}
+		t.Logf("block %d; adding resurrect\n", e.blocknum)
+		return tx
+	}
+
+	blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 150, func(i int, b *BlockGen) {
+		var exp = new(expectation)
+		exp.blocknum = i + 1
+		exp.values = make(map[int]int)
+		for k, v := range current.values {
+			exp.values[k] = v
+		}
+		exp.exist = current.exist
+
+		b.SetCoinbase(common.Address{1})
+		if i%2 == 0 {
+			b.AddTx(newDestruct(exp))
+		}
+		if i%3 == 0 {
+			b.AddTx(newResurrect(exp))
+		}
+		if i%5 == 0 {
+			b.AddTx(newDestruct(exp))
+		}
+		if i%7 == 0 {
+			b.AddTx(newResurrect(exp))
+		}
+		expectations = append(expectations, exp)
+		current = exp
+	})
+	// Import the canonical chain
+	diskdb := rawdb.NewMemoryDatabase()
+	gspec.MustCommit(diskdb)
+	chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{
+		//Debug:  true,
+		//Tracer: vm.NewJSONLogger(nil, os.Stdout),
+	}, nil)
+	if err != nil {
+		t.Fatalf("failed to create tester chain: %v", err)
+	}
+	var asHash = func(num int) common.Hash {
+		return common.BytesToHash([]byte{byte(num)})
+	}
+	for i, block := range blocks {
+		blockNum := i + 1
+		if n, err := chain.InsertChain([]*types.Block{block}); err != nil {
+			t.Fatalf("block %d: failed to insert into chain: %v", n, err)
+		}
+		statedb, _ := chain.State()
+		// If all is correct, then slot 1 and 2 are zero
+		if got, exp := statedb.GetState(aa, common.HexToHash("01")), (common.Hash{}); got != exp {
+			t.Errorf("block %d, got %x exp %x", blockNum, got, exp)
+		}
+		if got, exp := statedb.GetState(aa, common.HexToHash("02")), (common.Hash{}); got != exp {
+			t.Errorf("block %d, got %x exp %x", blockNum, got, exp)
+		}
+		exp := expectations[i]
+		if exp.exist {
+			if !statedb.Exist(aa) {
+				t.Fatalf("block %d, expected %v to exist, it did not", blockNum, aa)
+			}
+			for slot, val := range exp.values {
+				if gotValue, expValue := statedb.GetState(aa, asHash(slot)), asHash(val); gotValue != expValue {
+					t.Fatalf("block %d, slot %d, got %x exp %x", blockNum, slot, gotValue, expValue)
+				}
+			}
+		} else {
+			if statedb.Exist(aa) {
+				t.Fatalf("block %d, expected %v to not exist, it did", blockNum, aa)
+			}
+		}
+	}
+}

From fab0ee3bfad9c685064dc12a210479a196e1cb3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Wed, 4 Mar 2020 15:06:04 +0200
Subject: [PATCH 27/28] core/state/snapshot: fix various iteration issues due
 to destruct set

---
 core/state/snapshot/difflayer.go      |  8 +--
 core/state/snapshot/difflayer_test.go | 75 ++++++++++++++++++---------
 core/state/snapshot/iterator.go       |  9 ++--
 core/state/snapshot/iterator_test.go  | 14 ++---
 4 files changed, 67 insertions(+), 39 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index 0915fb6bc..86ca5c8ba 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -493,12 +493,14 @@ func (dl *diffLayer) AccountList() []common.Hash {
 	defer dl.lock.Unlock()
 
 	dl.accountList = make([]common.Hash, 0, len(dl.destructSet)+len(dl.accountData))
-	for hash := range dl.destructSet {
-		dl.accountList = append(dl.accountList, hash)
-	}
 	for hash := range dl.accountData {
 		dl.accountList = append(dl.accountList, hash)
 	}
+	for hash := range dl.destructSet {
+		if _, ok := dl.accountData[hash]; !ok {
+			dl.accountList = append(dl.accountList, hash)
+		}
+	}
 	sort.Sort(hashes(dl.accountList))
 	return dl.accountList
 }
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 61d2ed9c0..329e0eb8e 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -27,6 +27,33 @@ import (
 	"github.com/ethereum/go-ethereum/ethdb/memorydb"
 )
 
+func copyDestructs(destructs map[common.Hash]struct{}) map[common.Hash]struct{} {
+	copy := make(map[common.Hash]struct{})
+	for hash := range destructs {
+		copy[hash] = struct{}{}
+	}
+	return copy
+}
+
+func copyAccounts(accounts map[common.Hash][]byte) map[common.Hash][]byte {
+	copy := make(map[common.Hash][]byte)
+	for hash, blob := range accounts {
+		copy[hash] = blob
+	}
+	return copy
+}
+
+func copyStorage(storage map[common.Hash]map[common.Hash][]byte) map[common.Hash]map[common.Hash][]byte {
+	copy := make(map[common.Hash]map[common.Hash][]byte)
+	for accHash, slots := range storage {
+		copy[accHash] = make(map[common.Hash][]byte)
+		for slotHash, blob := range slots {
+			copy[accHash][slotHash] = blob
+		}
+	}
+	return copy
+}
+
 // TestMergeBasics tests some simple merges
 func TestMergeBasics(t *testing.T) {
 	var (
@@ -52,41 +79,41 @@ func TestMergeBasics(t *testing.T) {
 		}
 	}
 	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage)
-	child := newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
-	child = newDiffLayer(child, common.Hash{}, destructs, accounts, storage)
-	child = newDiffLayer(child, common.Hash{}, destructs, accounts, storage)
-	child = newDiffLayer(child, common.Hash{}, destructs, accounts, storage)
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child := newDiffLayer(parent, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
 
 	{ // Check account lists
-		if got, exp := len(merged.accountList), 0; got != exp {
-			t.Errorf("accountList wrong, got %v exp %v", got, exp)
+		if have, want := len(merged.accountList), 0; have != want {
+			t.Errorf("accountList wrong: have %v, want %v", have, want)
 		}
-		if got, exp := len(merged.AccountList()), len(accounts); got != exp {
-			t.Errorf("AccountList() wrong, got %v exp %v", got, exp)
+		if have, want := len(merged.AccountList()), len(accounts); have != want {
+			t.Errorf("AccountList() wrong: have %v, want %v", have, want)
 		}
-		if got, exp := len(merged.accountList), len(accounts); got != exp {
-			t.Errorf("accountList [2] wrong, got %v exp %v", got, exp)
+		if have, want := len(merged.accountList), len(accounts); have != want {
+			t.Errorf("accountList [2] wrong: have %v, want %v", have, want)
 		}
 	}
 	{ // Check account drops
-		if got, exp := len(merged.destructSet), len(destructs); got != exp {
-			t.Errorf("accountDrop wrong, got %v exp %v", got, exp)
+		if have, want := len(merged.destructSet), len(destructs); have != want {
+			t.Errorf("accountDrop wrong: have %v, want %v", have, want)
 		}
 	}
 	{ // Check storage lists
 		i := 0
 		for aHash, sMap := range storage {
-			if got, exp := len(merged.storageList), i; got != exp {
-				t.Errorf("[1] storageList wrong, got %v exp %v", got, exp)
+			if have, want := len(merged.storageList), i; have != want {
+				t.Errorf("[1] storageList wrong: have %v, want %v", have, want)
 			}
-			if got, exp := len(merged.StorageList(aHash)), len(sMap); got != exp {
-				t.Errorf("[2] StorageList() wrong, got %v exp %v", got, exp)
+			if have, want := len(merged.StorageList(aHash)), len(sMap); have != want {
+				t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want)
 			}
-			if got, exp := len(merged.storageList[aHash]), len(sMap); got != exp {
-				t.Errorf("storageList wrong, got %v exp %v", got, exp)
+			if have, want := len(merged.storageList[aHash]), len(sMap); have != want {
+				t.Errorf("storageList wrong: have %v, want %v", have, want)
 			}
 			i++
 		}
@@ -160,8 +187,8 @@ func TestMergeDelete(t *testing.T) {
 	}
 	// If we add more granular metering of memory, we can enable this again,
 	// but it's not implemented for now
-	//if got, exp := merged.memory, child.memory; got != exp {
-	//	t.Errorf("mem wrong, got %d, exp %d", got, exp)
+	//if have, want := merged.memory, child.memory; have != want {
+	//	t.Errorf("mem wrong: have %d, want %d", have, want)
 	//}
 }
 
@@ -197,9 +224,9 @@ func TestInsertAndMerge(t *testing.T) {
 	// And flatten
 	merged := (child.flatten()).(*diffLayer)
 	{ // Check that slot value is present
-		got, _ := merged.Storage(acc, slot)
-		if exp := []byte{0x01}; !bytes.Equal(got, exp) {
-			t.Errorf("merged slot value wrong, got %x, exp %x", got, exp)
+		have, _ := merged.Storage(acc, slot)
+		if want := []byte{0x01}; !bytes.Equal(have, want) {
+			t.Errorf("merged slot value wrong: have %x, want %x", have, want)
 		}
 	}
 }
diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go
index 774e9f554..f0b1eafa9 100644
--- a/core/state/snapshot/iterator.go
+++ b/core/state/snapshot/iterator.go
@@ -60,12 +60,6 @@ type diffAccountIterator struct {
 	// hash as long as the iterator is not touched any more.
 	curHash common.Hash
 
-	// curAccount is the current value the iterator is positioned on. The field
-	// is explicitly tracked since the referenced diff layer might go stale after
-	// the iterator was positioned and we don't want to fail accessing the old
-	// value as long as the iterator is not touched any more.
-	//curAccount []byte
-
 	layer *diffLayer    // Live layer to retrieve values from
 	keys  []common.Hash // Keys left in the layer to iterate
 	fail  error         // Any failures encountered (stale)
@@ -130,6 +124,9 @@ func (it *diffAccountIterator) Account() []byte {
 	it.layer.lock.RLock()
 	blob, ok := it.layer.accountData[it.curHash]
 	if !ok {
+		if _, ok := it.layer.destructSet[it.curHash]; ok {
+			return nil
+		}
 		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
 	}
 	it.layer.lock.RUnlock()
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
index 935fafc2f..5468a9a58 100644
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
@@ -53,7 +53,7 @@ func TestAccountIteratorBasics(t *testing.T) {
 		}
 	}
 	// Add some (identical) layers on top
-	parent := newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage)
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
 	it := parent.AccountIterator(common.Hash{})
 	verifyIterator(t, 100, it)
 }
@@ -398,15 +398,17 @@ func TestIteratorDeletions(t *testing.T) {
 	}
 	// Stack three diff layers on top with various overlaps
 	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
-		randomAccountSet("0x11", "0x22", "0x33"), nil)
+		nil, randomAccountSet("0x11", "0x22", "0x33"), nil)
 
-	set := randomAccountSet("0x11", "0x22", "0x33")
 	deleted := common.HexToHash("0x22")
-	set[deleted] = nil
-	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), set, nil)
+	destructed := map[common.Hash]struct{}{
+		deleted: struct{}{},
+	}
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+		destructed, randomAccountSet("0x11", "0x33"), nil)
 
 	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
-		randomAccountSet("0x33", "0x44", "0x55"), nil)
+		nil, randomAccountSet("0x33", "0x44", "0x55"), nil)
 
 	// The output should be 11,33,44,55
 	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})

From 074efe6c8dfe879adb26f60a8a9554fdf9da1907 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Fri, 6 Mar 2020 13:05:44 +0100
Subject: [PATCH 28/28] core: fix two snapshot iterator flaws, decollide snap
 storage prefix

* core/state/snapshot/iterator: fix two disk iterator flaws

* core/rawdb: change SnapshotStoragePrefix to avoid prefix collision with preimagePrefix
---
 core/blockchain_test.go         | 123 ++++++++++++++++++++++++++++++++
 core/rawdb/schema.go            |   2 +-
 core/state/journal.go           |   6 +-
 core/state/snapshot/iterator.go |  18 +++--
 core/state/statedb.go           |  12 ++--
 5 files changed, 149 insertions(+), 12 deletions(-)

diff --git a/core/blockchain_test.go b/core/blockchain_test.go
index b6b497ece..f15ede449 100644
--- a/core/blockchain_test.go
+++ b/core/blockchain_test.go
@@ -2758,3 +2758,126 @@ func TestDeleteRecreateSlotsAcrossManyBlocks(t *testing.T) {
 		}
 	}
 }
+
+// TestInitThenFailCreateContract tests a pretty notorious case that happened
+// on mainnet over blocks 7338108, 7338110 and 7338115.
+// - Block 7338108: address e771789f5cccac282f23bb7add5690e1f6ca467c is initiated
+//   with 0.001 ether (thus created but no code)
+// - Block 7338110: a CREATE2 is attempted. The CREATE2 would deploy code on
+//   the same address e771789f5cccac282f23bb7add5690e1f6ca467c. However, the
+//   deployment fails due to OOG during initcode execution
+// - Block 7338115: another tx checks the balance of
+//   e771789f5cccac282f23bb7add5690e1f6ca467c, and the snapshotter returned it as
+//   zero.
+//
+// The problem being that the snapshotter maintains a destructset, and adds items
+// to the destructset in case something is created "onto" an existing item.
+// We need to either roll back the snapDestructs, or not place it into snapDestructs
+// in the first place.
+//
+func TestInitThenFailCreateContract(t *testing.T) {
+	var (
+		// Generate a canonical chain to act as the main dataset
+		engine = ethash.NewFaker()
+		db     = rawdb.NewMemoryDatabase()
+		// A sender who makes transactions, has some funds
+		key, _  = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291")
+		address = crypto.PubkeyToAddress(key.PublicKey)
+		funds   = big.NewInt(1000000000)
+		bb      = common.HexToAddress("0x000000000000000000000000000000000000bbbb")
+	)
+
+	// The bb-code needs to CREATE2 the aa contract. It consists of
+	// both initcode and deployment code
+	// initcode:
+	// 1. If blocknum < 1, error out (e.g invalid opcode)
+	// 2. else, return a snippet of code
+	initCode := []byte{
+		byte(vm.PUSH1), 0x1, // y (2)
+		byte(vm.NUMBER), // x (number)
+		byte(vm.GT),     // x > y?
+		byte(vm.PUSH1), byte(0x8),
+		byte(vm.JUMPI), // jump to label if number > 2
+		byte(0xFE),     // illegal opcode
+		byte(vm.JUMPDEST),
+		byte(vm.PUSH1), 0x2, // size
+		byte(vm.PUSH1), 0x0, // offset
+		byte(vm.RETURN), // return 2 bytes of zero-code
+	}
+	if l := len(initCode); l > 32 {
+		t.Fatalf("init code is too long for a pushx, need a more elaborate deployer")
+	}
+	bbCode := []byte{
+		// Push initcode onto stack
+		byte(vm.PUSH1) + byte(len(initCode)-1)}
+	bbCode = append(bbCode, initCode...)
+	bbCode = append(bbCode, []byte{
+		byte(vm.PUSH1), 0x0, // memory start on stack
+		byte(vm.MSTORE),
+		byte(vm.PUSH1), 0x00, // salt
+		byte(vm.PUSH1), byte(len(initCode)), // size
+		byte(vm.PUSH1), byte(32 - len(initCode)), // offset
+		byte(vm.PUSH1), 0x00, // endowment
+		byte(vm.CREATE2),
+	}...)
+
+	initHash := crypto.Keccak256Hash(initCode)
+	aa := crypto.CreateAddress2(bb, [32]byte{}, initHash[:])
+	t.Logf("Destination address: %x\n", aa)
+
+	gspec := &Genesis{
+		Config: params.TestChainConfig,
+		Alloc: GenesisAlloc{
+			address: {Balance: funds},
+			// The address aa has some funds
+			aa: {Balance: big.NewInt(100000)},
+			// The contract BB tries to create code onto AA
+			bb: {
+				Code:    bbCode,
+				Balance: big.NewInt(1),
+			},
+		},
+	}
+	genesis := gspec.MustCommit(db)
+	nonce := uint64(0)
+	blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 4, func(i int, b *BlockGen) {
+		b.SetCoinbase(common.Address{1})
+		// One transaction to BB
+		tx, _ := types.SignTx(types.NewTransaction(nonce, bb,
+			big.NewInt(0), 100000, big.NewInt(1), nil), types.HomesteadSigner{}, key)
+		b.AddTx(tx)
+		nonce++
+	})
+
+	// Import the canonical chain
+	diskdb := rawdb.NewMemoryDatabase()
+	gspec.MustCommit(diskdb)
+	chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{
+		//Debug:  true,
+		//Tracer: vm.NewJSONLogger(nil, os.Stdout),
+	}, nil)
+	if err != nil {
+		t.Fatalf("failed to create tester chain: %v", err)
+	}
+	statedb, _ := chain.State()
+	if got, exp := statedb.GetBalance(aa), big.NewInt(100000); got.Cmp(exp) != 0 {
+		t.Fatalf("Genesis err, got %v exp %v", got, exp)
+	}
+	// First block tries to create, but fails
+	{
+		block := blocks[0]
+		if _, err := chain.InsertChain([]*types.Block{blocks[0]}); err != nil {
+			t.Fatalf("block %d: failed to insert into chain: %v", block.NumberU64(), err)
+		}
+		statedb, _ = chain.State()
+		if got, exp := statedb.GetBalance(aa), big.NewInt(100000); got.Cmp(exp) != 0 {
+			t.Fatalf("block %d: got %v exp %v", block.NumberU64(), got, exp)
+		}
+	}
+	// Import the rest of the blocks
+	for _, block := range blocks[1:] {
+		if _, err := chain.InsertChain([]*types.Block{block}); err != nil {
+			t.Fatalf("block %d: failed to insert into chain: %v", block.NumberU64(), err)
+		}
+	}
+}
diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go
index dc8faca32..2c20df200 100644
--- a/core/rawdb/schema.go
+++ b/core/rawdb/schema.go
@@ -59,7 +59,7 @@ var (
 	txLookupPrefix        = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata
 	bloomBitsPrefix       = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits
 	SnapshotAccountPrefix = []byte("a") // SnapshotAccountPrefix + account hash -> account trie value
-	SnapshotStoragePrefix = []byte("s") // SnapshotStoragePrefix + account hash + storage hash -> storage trie value
+	SnapshotStoragePrefix = []byte("o") // SnapshotStoragePrefix + account hash + storage hash -> storage trie value
 
 	preimagePrefix = []byte("secure-key-")      // preimagePrefix + hash -> preimage
 	configPrefix   = []byte("ethereum-config-") // config prefix for the db
diff --git a/core/state/journal.go b/core/state/journal.go
index c0bd2b924..f242dac5a 100644
--- a/core/state/journal.go
+++ b/core/state/journal.go
@@ -90,7 +90,8 @@ type (
 		account *common.Address
 	}
 	resetObjectChange struct {
-		prev *stateObject
+		prev         *stateObject
+		prevdestruct bool
 	}
 	suicideChange struct {
 		account     *common.Address
@@ -142,6 +143,9 @@ func (ch createObjectChange) dirtied() *common.Address {
 
 func (ch resetObjectChange) revert(s *StateDB) {
 	s.setStateObject(ch.prev)
+	if !ch.prevdestruct && s.snap != nil {
+		delete(s.snapDestructs, ch.prev.addrHash)
+	}
 }
 
 func (ch resetObjectChange) dirtied() *common.Address {
diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go
index f0b1eafa9..e062298fa 100644
--- a/core/state/snapshot/iterator.go
+++ b/core/state/snapshot/iterator.go
@@ -148,9 +148,10 @@ type diskAccountIterator struct {
 
 // AccountIterator creates an account iterator over a disk layer.
 func (dl *diskLayer) AccountIterator(seek common.Hash) AccountIterator {
+	// TODO: Fix seek position, or remove seek parameter
 	return &diskAccountIterator{
 		layer: dl,
-		it:    dl.diskdb.NewIteratorWithPrefix(append(rawdb.SnapshotAccountPrefix, seek[:]...)),
+		it:    dl.diskdb.NewIteratorWithPrefix(rawdb.SnapshotAccountPrefix),
 	}
 }
 
@@ -160,11 +161,16 @@ func (it *diskAccountIterator) Next() bool {
 	if it.it == nil {
 		return false
 	}
-	// Try to advance the iterator and release it if we reahed the end
-	if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), rawdb.SnapshotAccountPrefix) {
-		it.it.Release()
-		it.it = nil
-		return false
+	// Try to advance the iterator and release it if we reached the end
+	for {
+		if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), rawdb.SnapshotAccountPrefix) {
+			it.it.Release()
+			it.it = nil
+			return false
+		}
+		if len(it.it.Key()) == len(rawdb.SnapshotAccountPrefix)+common.HashLength {
+			break
+		}
 	}
 	return true
 }
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 038005685..4f5c1703e 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -569,12 +569,19 @@ func (s *StateDB) GetOrNewStateObject(addr common.Address) *stateObject {
 func (s *StateDB) createObject(addr common.Address) (newobj, prev *stateObject) {
 	prev = s.getDeletedStateObject(addr) // Note, prev might have been deleted, we need that!
 
+	var prevdestruct bool
+	if s.snap != nil && prev != nil {
+		_, prevdestruct = s.snapDestructs[prev.addrHash]
+		if !prevdestruct {
+			s.snapDestructs[prev.addrHash] = struct{}{}
+		}
+	}
 	newobj = newObject(s, addr, Account{})
 	newobj.setNonce(0) // sets the object to dirty
 	if prev == nil {
 		s.journal.append(createObjectChange{account: &addr})
 	} else {
-		s.journal.append(resetObjectChange{prev: prev})
+		s.journal.append(resetObjectChange{prev: prev, prevdestruct: prevdestruct})
 	}
 	s.setStateObject(newobj)
 	return newobj, prev
@@ -595,9 +602,6 @@ func (s *StateDB) CreateAccount(addr common.Address) {
 	if prev != nil {
 		newObj.setBalance(prev.data.Balance)
 	}
-	if s.snap != nil && prev != nil {
-		s.snapDestructs[prev.addrHash] = struct{}{}
-	}
 }
 
 func (db *StateDB) ForEachStorage(addr common.Address, cb func(key, value common.Hash) bool) error {