forked from cerc-io/plugeth
65ed1a6871
This change speeds up trie hashing and all other activities that require RLP encoding of trie nodes by approximately 20%. The speedup is achieved by avoiding reflection overhead during node encoding. The interface type trie.node now contains a method 'encode' that works with rlp.EncoderBuffer. Management of EncoderBuffers is left to calling code. trie.hasher, which is pooled to avoid allocations, now maintains an EncoderBuffer. This means memory resources related to trie node encoding are tied to the hasher pool. Co-authored-by: Felix Lange <fjl@twurst.com>
275 lines
8.0 KiB
Go
275 lines
8.0 KiB
Go
// Copyright 2019 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package trie
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/crypto"
|
|
"golang.org/x/crypto/sha3"
|
|
)
|
|
|
|
// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
|
|
// some parallelism but not incur too much memory overhead.
|
|
const leafChanSize = 200
|
|
|
|
// leaf represents a trie leaf value
|
|
type leaf struct {
|
|
size int // size of the rlp data (estimate)
|
|
hash common.Hash // hash of rlp data
|
|
node node // the node to commit
|
|
}
|
|
|
|
// committer is a type used for the trie Commit operation. A committer has some
|
|
// internal preallocated temp space, and also a callback that is invoked when
|
|
// leaves are committed. The leafs are passed through the `leafCh`, to allow
|
|
// some level of parallelism.
|
|
// By 'some level' of parallelism, it's still the case that all leaves will be
|
|
// processed sequentially - onleaf will never be called in parallel or out of order.
|
|
type committer struct {
|
|
sha crypto.KeccakState
|
|
|
|
onleaf LeafCallback
|
|
leafCh chan *leaf
|
|
}
|
|
|
|
// committers live in a global sync.Pool
|
|
var committerPool = sync.Pool{
|
|
New: func() interface{} {
|
|
return &committer{
|
|
sha: sha3.NewLegacyKeccak256().(crypto.KeccakState),
|
|
}
|
|
},
|
|
}
|
|
|
|
// newCommitter creates a new committer or picks one from the pool.
|
|
func newCommitter() *committer {
|
|
return committerPool.Get().(*committer)
|
|
}
|
|
|
|
func returnCommitterToPool(h *committer) {
|
|
h.onleaf = nil
|
|
h.leafCh = nil
|
|
committerPool.Put(h)
|
|
}
|
|
|
|
// Commit collapses a node down into a hash node and inserts it into the database
|
|
func (c *committer) Commit(n node, db *Database) (hashNode, int, error) {
|
|
if db == nil {
|
|
return nil, 0, errors.New("no db provided")
|
|
}
|
|
h, committed, err := c.commit(n, db)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
return h.(hashNode), committed, nil
|
|
}
|
|
|
|
// commit collapses a node down into a hash node and inserts it into the database
|
|
func (c *committer) commit(n node, db *Database) (node, int, error) {
|
|
// if this path is clean, use available cached data
|
|
hash, dirty := n.cache()
|
|
if hash != nil && !dirty {
|
|
return hash, 0, nil
|
|
}
|
|
// Commit children, then parent, and remove remove the dirty flag.
|
|
switch cn := n.(type) {
|
|
case *shortNode:
|
|
// Commit child
|
|
collapsed := cn.copy()
|
|
|
|
// If the child is fullNode, recursively commit,
|
|
// otherwise it can only be hashNode or valueNode.
|
|
var childCommitted int
|
|
if _, ok := cn.Val.(*fullNode); ok {
|
|
childV, committed, err := c.commit(cn.Val, db)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
collapsed.Val, childCommitted = childV, committed
|
|
}
|
|
// The key needs to be copied, since we're delivering it to database
|
|
collapsed.Key = hexToCompact(cn.Key)
|
|
hashedNode := c.store(collapsed, db)
|
|
if hn, ok := hashedNode.(hashNode); ok {
|
|
return hn, childCommitted + 1, nil
|
|
}
|
|
return collapsed, childCommitted, nil
|
|
case *fullNode:
|
|
hashedKids, childCommitted, err := c.commitChildren(cn, db)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
collapsed := cn.copy()
|
|
collapsed.Children = hashedKids
|
|
|
|
hashedNode := c.store(collapsed, db)
|
|
if hn, ok := hashedNode.(hashNode); ok {
|
|
return hn, childCommitted + 1, nil
|
|
}
|
|
return collapsed, childCommitted, nil
|
|
case hashNode:
|
|
return cn, 0, nil
|
|
default:
|
|
// nil, valuenode shouldn't be committed
|
|
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
|
|
}
|
|
}
|
|
|
|
// commitChildren commits the children of the given fullnode
|
|
func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, int, error) {
|
|
var (
|
|
committed int
|
|
children [17]node
|
|
)
|
|
for i := 0; i < 16; i++ {
|
|
child := n.Children[i]
|
|
if child == nil {
|
|
continue
|
|
}
|
|
// If it's the hashed child, save the hash value directly.
|
|
// Note: it's impossible that the child in range [0, 15]
|
|
// is a valueNode.
|
|
if hn, ok := child.(hashNode); ok {
|
|
children[i] = hn
|
|
continue
|
|
}
|
|
// Commit the child recursively and store the "hashed" value.
|
|
// Note the returned node can be some embedded nodes, so it's
|
|
// possible the type is not hashNode.
|
|
hashed, childCommitted, err := c.commit(child, db)
|
|
if err != nil {
|
|
return children, 0, err
|
|
}
|
|
children[i] = hashed
|
|
committed += childCommitted
|
|
}
|
|
// For the 17th child, it's possible the type is valuenode.
|
|
if n.Children[16] != nil {
|
|
children[16] = n.Children[16]
|
|
}
|
|
return children, committed, nil
|
|
}
|
|
|
|
// store hashes the node n and if we have a storage layer specified, it writes
|
|
// the key/value pair to it and tracks any node->child references as well as any
|
|
// node->external trie references.
|
|
func (c *committer) store(n node, db *Database) node {
|
|
// Larger nodes are replaced by their hash and stored in the database.
|
|
var (
|
|
hash, _ = n.cache()
|
|
size int
|
|
)
|
|
if hash == nil {
|
|
// This was not generated - must be a small node stored in the parent.
|
|
// In theory, we should apply the leafCall here if it's not nil(embedded
|
|
// node usually contains value). But small value(less than 32bytes) is
|
|
// not our target.
|
|
return n
|
|
} else {
|
|
// We have the hash already, estimate the RLP encoding-size of the node.
|
|
// The size is used for mem tracking, does not need to be exact
|
|
size = estimateSize(n)
|
|
}
|
|
// If we're using channel-based leaf-reporting, send to channel.
|
|
// The leaf channel will be active only when there an active leaf-callback
|
|
if c.leafCh != nil {
|
|
c.leafCh <- &leaf{
|
|
size: size,
|
|
hash: common.BytesToHash(hash),
|
|
node: n,
|
|
}
|
|
} else if db != nil {
|
|
// No leaf-callback used, but there's still a database. Do serial
|
|
// insertion
|
|
db.lock.Lock()
|
|
db.insert(common.BytesToHash(hash), size, n)
|
|
db.lock.Unlock()
|
|
}
|
|
return hash
|
|
}
|
|
|
|
// commitLoop does the actual insert + leaf callback for nodes.
|
|
func (c *committer) commitLoop(db *Database) {
|
|
for item := range c.leafCh {
|
|
var (
|
|
hash = item.hash
|
|
size = item.size
|
|
n = item.node
|
|
)
|
|
// We are pooling the trie nodes into an intermediate memory cache
|
|
db.lock.Lock()
|
|
db.insert(hash, size, n)
|
|
db.lock.Unlock()
|
|
|
|
if c.onleaf != nil {
|
|
switch n := n.(type) {
|
|
case *shortNode:
|
|
if child, ok := n.Val.(valueNode); ok {
|
|
c.onleaf(nil, nil, child, hash)
|
|
}
|
|
case *fullNode:
|
|
// For children in range [0, 15], it's impossible
|
|
// to contain valueNode. Only check the 17th child.
|
|
if n.Children[16] != nil {
|
|
c.onleaf(nil, nil, n.Children[16].(valueNode), hash)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *committer) makeHashNode(data []byte) hashNode {
|
|
n := make(hashNode, c.sha.Size())
|
|
c.sha.Reset()
|
|
c.sha.Write(data)
|
|
c.sha.Read(n)
|
|
return n
|
|
}
|
|
|
|
// estimateSize estimates the size of an rlp-encoded node, without actually
|
|
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
|
|
// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
|
|
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
|
|
func estimateSize(n node) int {
|
|
switch n := n.(type) {
|
|
case *shortNode:
|
|
// A short node contains a compacted key, and a value.
|
|
return 3 + len(n.Key) + estimateSize(n.Val)
|
|
case *fullNode:
|
|
// A full node contains up to 16 hashes (some nils), and a key
|
|
s := 3
|
|
for i := 0; i < 16; i++ {
|
|
if child := n.Children[i]; child != nil {
|
|
s += estimateSize(child)
|
|
} else {
|
|
s++
|
|
}
|
|
}
|
|
return s
|
|
case valueNode:
|
|
return 1 + len(n)
|
|
case hashNode:
|
|
return 1 + len(n)
|
|
default:
|
|
panic(fmt.Sprintf("node type %T", n))
|
|
}
|
|
}
|