From 5a9c96454e04673124eedf681ae3aa0a58ffc843 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Mon, 3 Feb 2020 16:28:30 +0100
Subject: [PATCH] trie: separate hashes and committer, collapse on commit

* trie:  make db insert use size instead of full data

* core/state: minor optimization in state onleaf allocation

* trie: implement dedicated committer and hasher

* trie: use dedicated committer/hasher

* trie: linter nitpicks

* core/state, trie: avoid unnecessary storage trie load+commit

* trie: review feedback, mainly docs + minor changes

* trie: start deprecating old hasher

* trie: fix misspell+lint

* trie: deprecate hasher.go, make proof framework use new hasher

* trie: rename pure_committer/hasher to committer/hasher

* trie, core/state: fix review concerns

* trie: more review concerns

* trie: make commit collapse into hashnode, don't touch dirtyness

* trie: goimports fixes

* trie: remove panics
---
 core/state/state_object.go |  16 ++-
 core/state/statedb.go      |   7 +-
 trie/committer.go          | 279 +++++++++++++++++++++++++++++++++++++
 trie/database.go           |  10 +-
 trie/hasher.go             | 235 ++++++++++++++-----------------
 trie/iterator.go           |   6 +-
 trie/proof.go              |  24 ++--
 trie/secure_trie.go        |   2 +-
 trie/trie.go               |  44 +++++-
 9 files changed, 462 insertions(+), 161 deletions(-)
 create mode 100644 trie/committer.go

diff --git a/core/state/state_object.go b/core/state/state_object.go
index 8680de021..667d5ec02 100644
--- a/core/state/state_object.go
+++ b/core/state/state_object.go
@@ -272,10 +272,13 @@ func (s *stateObject) finalise() {
 }
 
 // updateTrie writes cached storage modifications into the object's storage trie.
+// It will return nil if the trie has not been loaded and no changes have been made
 func (s *stateObject) updateTrie(db Database) Trie {
 	// Make sure all dirty slots are finalized into the pending storage area
 	s.finalise()
-
+	if len(s.pendingStorage) == 0 {
+		return s.trie
+	}
 	// Track the amount of time wasted on updating the storge trie
 	if metrics.EnabledExpensive {
 		defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now())
@@ -305,8 +308,10 @@ func (s *stateObject) updateTrie(db Database) Trie {
 
 // UpdateRoot sets the trie root to the current root hash of
 func (s *stateObject) updateRoot(db Database) {
-	s.updateTrie(db)
-
+	// If nothing changed, don't bother with hashing anything
+	if s.updateTrie(db) == nil {
+		return
+	}
 	// Track the amount of time wasted on hashing the storge trie
 	if metrics.EnabledExpensive {
 		defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now())
@@ -317,7 +322,10 @@ func (s *stateObject) updateRoot(db Database) {
 // CommitTrie the storage trie of the object to db.
 // This updates the trie root.
 func (s *stateObject) CommitTrie(db Database) error {
-	s.updateTrie(db)
+	// If nothing changed, don't bother with hashing anything
+	if s.updateTrie(db) == nil {
+		return nil
+	}
 	if s.dbErr != nil {
 		return s.dbErr
 	}
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 085f2379f..5d40f59c6 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -330,7 +330,8 @@ func (s *StateDB) StorageTrie(addr common.Address) Trie {
 		return nil
 	}
 	cpy := stateObject.deepCopy(s)
-	return cpy.updateTrie(s.db)
+	cpy.updateTrie(s.db)
+	return cpy.getTrie(s.db)
 }
 
 func (s *StateDB) HasSuicided(addr common.Address) bool {
@@ -750,8 +751,10 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 	if metrics.EnabledExpensive {
 		defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now())
 	}
+	// The onleaf func is called _serially_, so we can reuse the same account
+	// for unmarshalling every time.
+	var account Account
 	return s.trie.Commit(func(leaf []byte, parent common.Hash) error {
-		var account Account
 		if err := rlp.DecodeBytes(leaf, &account); err != nil {
 			return nil
 		}
diff --git a/trie/committer.go b/trie/committer.go
new file mode 100644
index 000000000..eacefdff1
--- /dev/null
+++ b/trie/committer.go
@@ -0,0 +1,279 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package trie
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+	"golang.org/x/crypto/sha3"
+)
+
+// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
+// some paralellism but not incur too much memory overhead.
+const leafChanSize = 200
+
+// leaf represents a trie leaf value
+type leaf struct {
+	size   int         // size of the rlp data (estimate)
+	hash   common.Hash // hash of rlp data
+	node   node        // the node to commit
+	vnodes bool        // set to true if the node (possibly) contains a valueNode
+}
+
+// committer is a type used for the trie Commit operation. A committer has some
+// internal preallocated temp space, and also a callback that is invoked when
+// leaves are committed. The leafs are passed through the `leafCh`,  to allow
+// some level of paralellism.
+// By 'some level' of parallelism, it's still the case that all leaves will be
+// processed sequentially - onleaf will never be called in parallel or out of order.
+type committer struct {
+	tmp sliceBuffer
+	sha keccakState
+
+	onleaf LeafCallback
+	leafCh chan *leaf
+}
+
+// committers live in a global sync.Pool
+var committerPool = sync.Pool{
+	New: func() interface{} {
+		return &committer{
+			tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
+			sha: sha3.NewLegacyKeccak256().(keccakState),
+		}
+	},
+}
+
+// newCommitter creates a new committer or picks one from the pool.
+func newCommitter() *committer {
+	return committerPool.Get().(*committer)
+}
+
+func returnCommitterToPool(h *committer) {
+	h.onleaf = nil
+	h.leafCh = nil
+	committerPool.Put(h)
+}
+
+// commitNeeded returns 'false' if the given node is already in sync with db
+func (c *committer) commitNeeded(n node) bool {
+	hash, dirty := n.cache()
+	return hash == nil || dirty
+}
+
+// commit collapses a node down into a hash node and inserts it into the database
+func (c *committer) Commit(n node, db *Database) (hashNode, error) {
+	if db == nil {
+		return nil, errors.New("no db provided")
+	}
+	h, err := c.commit(n, db, true)
+	if err != nil {
+		return nil, err
+	}
+	return h.(hashNode), nil
+}
+
+// commit collapses a node down into a hash node and inserts it into the database
+func (c *committer) commit(n node, db *Database, force bool) (node, error) {
+	// if this path is clean, use available cached data
+	hash, dirty := n.cache()
+	if hash != nil && !dirty {
+		return hash, nil
+	}
+	// Commit children, then parent, and remove remove the dirty flag.
+	switch cn := n.(type) {
+	case *shortNode:
+		// Commit child
+		collapsed := cn.copy()
+		if _, ok := cn.Val.(valueNode); !ok {
+			if childV, err := c.commit(cn.Val, db, false); err != nil {
+				return nil, err
+			} else {
+				collapsed.Val = childV
+			}
+		}
+		// The key needs to be copied, since we're delivering it to database
+		collapsed.Key = hexToCompact(cn.Key)
+		hashedNode := c.store(collapsed, db, force, true)
+		if hn, ok := hashedNode.(hashNode); ok {
+			return hn, nil
+		} else {
+			return collapsed, nil
+		}
+	case *fullNode:
+		hashedKids, hasVnodes, err := c.commitChildren(cn, db, force)
+		if err != nil {
+			return nil, err
+		}
+		collapsed := cn.copy()
+		collapsed.Children = hashedKids
+
+		hashedNode := c.store(collapsed, db, force, hasVnodes)
+		if hn, ok := hashedNode.(hashNode); ok {
+			return hn, nil
+		} else {
+			return collapsed, nil
+		}
+	case valueNode:
+		return c.store(cn, db, force, false), nil
+	// hashnodes aren't stored
+	case hashNode:
+		return cn, nil
+	}
+	return hash, nil
+}
+
+// commitChildren commits the children of the given fullnode
+func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) {
+	var children [17]node
+	var hasValueNodeChildren = false
+	for i, child := range n.Children {
+		if child == nil {
+			continue
+		}
+		hnode, err := c.commit(child, db, false)
+		if err != nil {
+			return children, false, err
+		}
+		children[i] = hnode
+		if _, ok := hnode.(valueNode); ok {
+			hasValueNodeChildren = true
+		}
+	}
+	return children, hasValueNodeChildren, nil
+}
+
+// store hashes the node n and if we have a storage layer specified, it writes
+// the key/value pair to it and tracks any node->child references as well as any
+// node->external trie references.
+func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node {
+	// Larger nodes are replaced by their hash and stored in the database.
+	var (
+		hash, _ = n.cache()
+		size    int
+	)
+	if hash == nil {
+		if vn, ok := n.(valueNode); ok {
+			c.tmp.Reset()
+			if err := rlp.Encode(&c.tmp, vn); err != nil {
+				panic("encode error: " + err.Error())
+			}
+			size = len(c.tmp)
+			if size < 32 && !force {
+				return n // Nodes smaller than 32 bytes are stored inside their parent
+			}
+			hash = c.makeHashNode(c.tmp)
+		} else {
+			// This was not generated - must be a small node stored in the parent
+			// No need to do anything here
+			return n
+		}
+	} else {
+		// We have the hash already, estimate the RLP encoding-size of the node.
+		// The size is used for mem tracking, does not need to be exact
+		size = estimateSize(n)
+	}
+	// If we're using channel-based leaf-reporting, send to channel.
+	// The leaf channel will be active only when there an active leaf-callback
+	if c.leafCh != nil {
+		c.leafCh <- &leaf{
+			size:   size,
+			hash:   common.BytesToHash(hash),
+			node:   n,
+			vnodes: hasVnodeChildren,
+		}
+	} else if db != nil {
+		// No leaf-callback used, but there's still a database. Do serial
+		// insertion
+		db.lock.Lock()
+		db.insert(common.BytesToHash(hash), size, n)
+		db.lock.Unlock()
+	}
+	return hash
+}
+
+// commitLoop does the actual insert + leaf callback for nodes
+func (c *committer) commitLoop(db *Database) {
+	for item := range c.leafCh {
+		var (
+			hash      = item.hash
+			size      = item.size
+			n         = item.node
+			hasVnodes = item.vnodes
+		)
+		// We are pooling the trie nodes into an intermediate memory cache
+		db.lock.Lock()
+		db.insert(hash, size, n)
+		db.lock.Unlock()
+		if c.onleaf != nil && hasVnodes {
+			switch n := n.(type) {
+			case *shortNode:
+				if child, ok := n.Val.(valueNode); ok {
+					c.onleaf(child, hash)
+				}
+			case *fullNode:
+				for i := 0; i < 16; i++ {
+					if child, ok := n.Children[i].(valueNode); ok {
+						c.onleaf(child, hash)
+					}
+				}
+			}
+		}
+	}
+}
+
+func (c *committer) makeHashNode(data []byte) hashNode {
+	n := make(hashNode, c.sha.Size())
+	c.sha.Reset()
+	c.sha.Write(data)
+	c.sha.Read(n)
+	return n
+}
+
+// estimateSize estimates the size of an rlp-encoded node, without actually
+// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
+// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
+// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
+func estimateSize(n node) int {
+	switch n := n.(type) {
+	case *shortNode:
+		// A short node contains a compacted key, and a value.
+		return 3 + len(n.Key) + estimateSize(n.Val)
+	case *fullNode:
+		// A full node contains up to 16 hashes (some nils), and a key
+		s := 3
+		for i := 0; i < 16; i++ {
+			if child := n.Children[i]; child != nil {
+				s += estimateSize(child)
+			} else {
+				s += 1
+			}
+		}
+		return s
+	case valueNode:
+		return 1 + len(n)
+	case hashNode:
+		return 1 + len(n)
+	default:
+		panic(fmt.Sprintf("node type %T", n))
+
+	}
+}
diff --git a/trie/database.go b/trie/database.go
index dee9f7844..5b673938f 100644
--- a/trie/database.go
+++ b/trie/database.go
@@ -310,24 +310,24 @@ func (db *Database) InsertBlob(hash common.Hash, blob []byte) {
 	db.lock.Lock()
 	defer db.lock.Unlock()
 
-	db.insert(hash, blob, rawNode(blob))
+	db.insert(hash, len(blob), rawNode(blob))
 }
 
 // insert inserts a collapsed trie node into the memory database. This method is
 // a more generic version of InsertBlob, supporting both raw blob insertions as
-// well ex trie node insertions. The blob must always be specified to allow proper
+// well ex trie node insertions. The blob size must be specified to allow proper
 // size tracking.
-func (db *Database) insert(hash common.Hash, blob []byte, node node) {
+func (db *Database) insert(hash common.Hash, size int, node node) {
 	// If the node's already cached, skip
 	if _, ok := db.dirties[hash]; ok {
 		return
 	}
-	memcacheDirtyWriteMeter.Mark(int64(len(blob)))
+	memcacheDirtyWriteMeter.Mark(int64(size))
 
 	// Create the cached entry for this node
 	entry := &cachedNode{
 		node:      simplifyNode(node),
-		size:      uint16(len(blob)),
+		size:      uint16(size),
 		flushPrev: db.newest,
 	}
 	entry.forChilds(func(child common.Hash) {
diff --git a/trie/hasher.go b/trie/hasher.go
index 54f6a9de2..71a3aec3b 100644
--- a/trie/hasher.go
+++ b/trie/hasher.go
@@ -1,4 +1,4 @@
-// Copyright 2016 The go-ethereum Authors
+// Copyright 2019 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
@@ -20,17 +20,10 @@ import (
 	"hash"
 	"sync"
 
-	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/rlp"
 	"golang.org/x/crypto/sha3"
 )
 
-type hasher struct {
-	tmp    sliceBuffer
-	sha    keccakState
-	onleaf LeafCallback
-}
-
 // keccakState wraps sha3.state. In addition to the usual hash methods, it also supports
 // Read to get a variable amount of data from the hash state. Read is faster than Sum
 // because it doesn't copy the internal state, but also modifies the internal state.
@@ -50,7 +43,14 @@ func (b *sliceBuffer) Reset() {
 	*b = (*b)[:0]
 }
 
-// hashers live in a global db.
+// hasher is a type used for the trie Hash operation. A hasher has some
+// internal preallocated temp space
+type hasher struct {
+	sha keccakState
+	tmp sliceBuffer
+}
+
+// hasherPool holds pureHashers
 var hasherPool = sync.Pool{
 	New: func() interface{} {
 		return &hasher{
@@ -60,9 +60,8 @@ var hasherPool = sync.Pool{
 	},
 }
 
-func newHasher(onleaf LeafCallback) *hasher {
+func newHasher() *hasher {
 	h := hasherPool.Get().(*hasher)
-	h.onleaf = onleaf
 	return h
 }
 
@@ -72,144 +71,126 @@ func returnHasherToPool(h *hasher) {
 
 // hash collapses a node down into a hash node, also returning a copy of the
 // original node initialized with the computed hash to replace the original one.
-func (h *hasher) hash(n node, db *Database, force bool) (node, node, error) {
-	// If we're not storing the node, just hashing, use available cached data
-	if hash, dirty := n.cache(); hash != nil {
-		if db == nil {
-			return hash, n, nil
-		}
-		if !dirty {
-			switch n.(type) {
-			case *fullNode, *shortNode:
-				return hash, hash, nil
-			default:
-				return hash, n, nil
-			}
-		}
+func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
+	// We're not storing the node, just hashing, use available cached data
+	if hash, _ := n.cache(); hash != nil {
+		return hash, n
 	}
 	// Trie not processed yet or needs storage, walk the children
-	collapsed, cached, err := h.hashChildren(n, db)
-	if err != nil {
-		return hashNode{}, n, err
-	}
-	hashed, err := h.store(collapsed, db, force)
-	if err != nil {
-		return hashNode{}, n, err
-	}
-	// Cache the hash of the node for later reuse and remove
-	// the dirty flag in commit mode. It's fine to assign these values directly
-	// without copying the node first because hashChildren copies it.
-	cachedHash, _ := hashed.(hashNode)
-	switch cn := cached.(type) {
+	switch n := n.(type) {
 	case *shortNode:
-		cn.flags.hash = cachedHash
-		if db != nil {
-			cn.flags.dirty = false
+		collapsed, cached := h.hashShortNodeChildren(n)
+		hashed := h.shortnodeToHash(collapsed, force)
+		// We need to retain the possibly _not_ hashed node, in case it was too
+		// small to be hashed
+		if hn, ok := hashed.(hashNode); ok {
+			cached.flags.hash = hn
+		} else {
+			cached.flags.hash = nil
 		}
+		return hashed, cached
 	case *fullNode:
-		cn.flags.hash = cachedHash
-		if db != nil {
-			cn.flags.dirty = false
+		collapsed, cached := h.hashFullNodeChildren(n)
+		hashed = h.fullnodeToHash(collapsed, force)
+		if hn, ok := hashed.(hashNode); ok {
+			cached.flags.hash = hn
+		} else {
+			cached.flags.hash = nil
 		}
-	}
-	return hashed, cached, nil
-}
-
-// hashChildren replaces the children of a node with their hashes if the encoded
-// size of the child is larger than a hash, returning the collapsed node as well
-// as a replacement for the original node with the child hashes cached in.
-func (h *hasher) hashChildren(original node, db *Database) (node, node, error) {
-	var err error
-
-	switch n := original.(type) {
-	case *shortNode:
-		// Hash the short node's child, caching the newly hashed subtree
-		collapsed, cached := n.copy(), n.copy()
-		collapsed.Key = hexToCompact(n.Key)
-		cached.Key = common.CopyBytes(n.Key)
-
-		if _, ok := n.Val.(valueNode); !ok {
-			collapsed.Val, cached.Val, err = h.hash(n.Val, db, false)
-			if err != nil {
-				return original, original, err
-			}
-		}
-		return collapsed, cached, nil
-
-	case *fullNode:
-		// Hash the full node's children, caching the newly hashed subtrees
-		collapsed, cached := n.copy(), n.copy()
-
-		for i := 0; i < 16; i++ {
-			if n.Children[i] != nil {
-				collapsed.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false)
-				if err != nil {
-					return original, original, err
-				}
-			}
-		}
-		cached.Children[16] = n.Children[16]
-		return collapsed, cached, nil
-
+		return hashed, cached
 	default:
 		// Value and hash nodes don't have children so they're left as were
-		return n, original, nil
+		return n, n
 	}
 }
 
-// store hashes the node n and if we have a storage layer specified, it writes
-// the key/value pair to it and tracks any node->child references as well as any
-// node->external trie references.
-func (h *hasher) store(n node, db *Database, force bool) (node, error) {
-	// Don't store hashes or empty nodes.
-	if _, isHash := n.(hashNode); n == nil || isHash {
-		return n, nil
+// hashShortNodeChildren collapses the short node. The returned collapsed node
+// holds a live reference to the Key, and must not be modified.
+// The cached
+func (h *hasher) hashShortNodeChildren(n *shortNode) (collapsed, cached *shortNode) {
+	// Hash the short node's child, caching the newly hashed subtree
+	collapsed, cached = n.copy(), n.copy()
+	// Previously, we did copy this one. We don't seem to need to actually
+	// do that, since we don't overwrite/reuse keys
+	//cached.Key = common.CopyBytes(n.Key)
+	collapsed.Key = hexToCompact(n.Key)
+	// Unless the child is a valuenode or hashnode, hash it
+	switch n.Val.(type) {
+	case *fullNode, *shortNode:
+		collapsed.Val, cached.Val = h.hash(n.Val, false)
 	}
-	// Generate the RLP encoding of the node
+	return collapsed, cached
+}
+
+func (h *hasher) hashFullNodeChildren(n *fullNode) (collapsed *fullNode, cached *fullNode) {
+	// Hash the full node's children, caching the newly hashed subtrees
+	cached = n.copy()
+	collapsed = n.copy()
+	for i := 0; i < 16; i++ {
+		if child := n.Children[i]; child != nil {
+			collapsed.Children[i], cached.Children[i] = h.hash(child, false)
+		} else {
+			collapsed.Children[i] = nilValueNode
+		}
+	}
+	cached.Children[16] = n.Children[16]
+	return collapsed, cached
+}
+
+// shortnodeToHash creates a hashNode from a shortNode. The supplied shortnode
+// should have hex-type Key, which will be converted (without modification)
+// into compact form for RLP encoding.
+// If the rlp data is smaller than 32 bytes, `nil` is returned.
+func (h *hasher) shortnodeToHash(n *shortNode, force bool) node {
 	h.tmp.Reset()
 	if err := rlp.Encode(&h.tmp, n); err != nil {
 		panic("encode error: " + err.Error())
 	}
+
 	if len(h.tmp) < 32 && !force {
-		return n, nil // Nodes smaller than 32 bytes are stored inside their parent
+		return n // Nodes smaller than 32 bytes are stored inside their parent
 	}
-	// Larger nodes are replaced by their hash and stored in the database.
-	hash, _ := n.cache()
-	if hash == nil {
-		hash = h.makeHashNode(h.tmp)
-	}
-
-	if db != nil {
-		// We are pooling the trie nodes into an intermediate memory cache
-		hash := common.BytesToHash(hash)
-
-		db.lock.Lock()
-		db.insert(hash, h.tmp, n)
-		db.lock.Unlock()
-
-		// Track external references from account->storage trie
-		if h.onleaf != nil {
-			switch n := n.(type) {
-			case *shortNode:
-				if child, ok := n.Val.(valueNode); ok {
-					h.onleaf(child, hash)
-				}
-			case *fullNode:
-				for i := 0; i < 16; i++ {
-					if child, ok := n.Children[i].(valueNode); ok {
-						h.onleaf(child, hash)
-					}
-				}
-			}
-		}
-	}
-	return hash, nil
+	return h.hashData(h.tmp)
 }
 
-func (h *hasher) makeHashNode(data []byte) hashNode {
-	n := make(hashNode, h.sha.Size())
+// shortnodeToHash is used to creates a hashNode from a set of hashNodes, (which
+// may contain nil values)
+func (h *hasher) fullnodeToHash(n *fullNode, force bool) node {
+	h.tmp.Reset()
+	// Generate the RLP encoding of the node
+	if err := n.EncodeRLP(&h.tmp); err != nil {
+		panic("encode error: " + err.Error())
+	}
+
+	if len(h.tmp) < 32 && !force {
+		return n // Nodes smaller than 32 bytes are stored inside their parent
+	}
+	return h.hashData(h.tmp)
+}
+
+// hashData hashes the provided data
+func (h *hasher) hashData(data []byte) hashNode {
+	n := make(hashNode, 32)
 	h.sha.Reset()
 	h.sha.Write(data)
 	h.sha.Read(n)
 	return n
 }
+
+// proofHash is used to construct trie proofs, and returns the 'collapsed'
+// node (for later RLP encoding) aswell as the hashed node -- unless the
+// node is smaller than 32 bytes, in which case it will be returned as is.
+// This method does not do anything on value- or hash-nodes.
+func (h *hasher) proofHash(original node) (collapsed, hashed node) {
+	switch n := original.(type) {
+	case *shortNode:
+		sn, _ := h.hashShortNodeChildren(n)
+		return sn, h.shortnodeToHash(sn, false)
+	case *fullNode:
+		fn, _ := h.hashFullNodeChildren(n)
+		return fn, h.fullnodeToHash(fn, false)
+	default:
+		// Value and hash nodes don't have children so they're left as were
+		return n, n
+	}
+}
diff --git a/trie/iterator.go b/trie/iterator.go
index 8e84dee3b..94b36a018 100644
--- a/trie/iterator.go
+++ b/trie/iterator.go
@@ -182,15 +182,13 @@ func (it *nodeIterator) LeafBlob() []byte {
 func (it *nodeIterator) LeafProof() [][]byte {
 	if len(it.stack) > 0 {
 		if _, ok := it.stack[len(it.stack)-1].node.(valueNode); ok {
-			hasher := newHasher(nil)
+			hasher := newHasher()
 			defer returnHasherToPool(hasher)
-
 			proofs := make([][]byte, 0, len(it.stack))
 
 			for i, item := range it.stack[:len(it.stack)-1] {
 				// Gather nodes that end up as hash nodes (or the root)
-				node, _, _ := hasher.hashChildren(item.node, nil)
-				hashed, _ := hasher.store(node, nil, false)
+				node, hashed := hasher.proofHash(item.node)
 				if _, ok := hashed.(hashNode); ok || i == 0 {
 					enc, _ := rlp.EncodeToBytes(node)
 					proofs = append(proofs, enc)
diff --git a/trie/proof.go b/trie/proof.go
index 9985e730d..f2c4658c4 100644
--- a/trie/proof.go
+++ b/trie/proof.go
@@ -64,26 +64,24 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e
 			panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
 		}
 	}
-	hasher := newHasher(nil)
+	hasher := newHasher()
 	defer returnHasherToPool(hasher)
 
 	for i, n := range nodes {
-		// Don't bother checking for errors here since hasher panics
-		// if encoding doesn't work and we're not writing to any database.
-		n, _, _ = hasher.hashChildren(n, nil)
-		hn, _ := hasher.store(n, nil, false)
+		if fromLevel > 0 {
+			fromLevel--
+			continue
+		}
+		var hn node
+		n, hn = hasher.proofHash(n)
 		if hash, ok := hn.(hashNode); ok || i == 0 {
 			// If the node's database encoding is a hash (or is the
 			// root node), it becomes a proof element.
-			if fromLevel > 0 {
-				fromLevel--
-			} else {
-				enc, _ := rlp.EncodeToBytes(n)
-				if !ok {
-					hash = hasher.makeHashNode(enc)
-				}
-				proofDb.Put(hash, enc)
+			enc, _ := rlp.EncodeToBytes(n)
+			if !ok {
+				hash = hasher.hashData(enc)
 			}
+			proofDb.Put(hash, enc)
 		}
 	}
 	return nil
diff --git a/trie/secure_trie.go b/trie/secure_trie.go
index fbc591ed1..b76a1dc8a 100644
--- a/trie/secure_trie.go
+++ b/trie/secure_trie.go
@@ -176,7 +176,7 @@ func (t *SecureTrie) NodeIterator(start []byte) NodeIterator {
 // The caller must not hold onto the return value because it will become
 // invalid on the next call to hashKey or secKey.
 func (t *SecureTrie) hashKey(key []byte) []byte {
-	h := newHasher(nil)
+	h := newHasher()
 	h.sha.Reset()
 	h.sha.Write(key)
 	buf := h.sha.Sum(t.hashKeyBuf[:0])
diff --git a/trie/trie.go b/trie/trie.go
index 920e331fd..dd26f9b34 100644
--- a/trie/trie.go
+++ b/trie/trie.go
@@ -20,6 +20,7 @@ package trie
 import (
 	"bytes"
 	"fmt"
+	"sync"
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/crypto"
@@ -415,19 +416,52 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
 	if t.db == nil {
 		panic("commit called on trie with nil database")
 	}
-	hash, cached, err := t.hashRoot(t.db, onleaf)
+	if t.root == nil {
+		return emptyRoot, nil
+	}
+	rootHash := t.Hash()
+	h := newCommitter()
+	defer returnCommitterToPool(h)
+	// Do a quick check if we really need to commit, before we spin
+	// up goroutines. This can happen e.g. if we load a trie for reading storage
+	// values, but don't write to it.
+	if !h.commitNeeded(t.root) {
+		return rootHash, nil
+	}
+	var wg sync.WaitGroup
+	if onleaf != nil {
+		h.onleaf = onleaf
+		h.leafCh = make(chan *leaf, leafChanSize)
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			h.commitLoop(t.db)
+		}()
+	}
+	var newRoot hashNode
+	newRoot, err = h.Commit(t.root, t.db)
+	if onleaf != nil {
+		// The leafch is created in newCommitter if there was an onleaf callback
+		// provided. The commitLoop only _reads_ from it, and the commit
+		// operation was the sole writer. Therefore, it's safe to close this
+		// channel here.
+		close(h.leafCh)
+		wg.Wait()
+	}
 	if err != nil {
 		return common.Hash{}, err
 	}
-	t.root = cached
-	return common.BytesToHash(hash.(hashNode)), nil
+	t.root = newRoot
+	return rootHash, nil
 }
 
+// hashRoot calculates the root hash of the given trie
 func (t *Trie) hashRoot(db *Database, onleaf LeafCallback) (node, node, error) {
 	if t.root == nil {
 		return hashNode(emptyRoot.Bytes()), nil, nil
 	}
-	h := newHasher(onleaf)
+	h := newHasher()
 	defer returnHasherToPool(h)
-	return h.hash(t.root, db, true)
+	hashed, cached := h.hash(t.root, true)
+	return hashed, cached, nil
 }