ipld-eth-statedb/trie_by_cid/trie/hasher.go
Roy Crihfield 761d60acdf Geth 1.13 (Deneb/Cancun) update (#5)
The Geth `core/state` and `trie` packages underwent a big refactor between `v1.11.6` and `1.13.14`.
This code, which was adapted from those, needed corresponding updates. To do this I applied the diff patches from Geth directly where possible and in some places had to clone new parts of the Geth code and adapt them.

In order to make this process as straightforward as possible in the future, I've attempted to minimize the number of changes vs. Geth and added some documentation in the `trie_by_cid` package.

Reviewed-on: #5
2024-05-29 10:00:12 +00:00

209 lines
6.3 KiB
Go

// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"sync"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/rlp"
"golang.org/x/crypto/sha3"
)
// hasher is a type used for the trie Hash operation. A hasher has some
// internal preallocated temp space
type hasher struct {
sha crypto.KeccakState
tmp []byte
encbuf rlp.EncoderBuffer
parallel bool // Whether to use parallel threads when hashing
}
// hasherPool holds pureHashers
var hasherPool = sync.Pool{
New: func() interface{} {
return &hasher{
tmp: make([]byte, 0, 550), // cap is as large as a full fullNode.
sha: sha3.NewLegacyKeccak256().(crypto.KeccakState),
encbuf: rlp.NewEncoderBuffer(nil),
}
},
}
func newHasher(parallel bool) *hasher {
h := hasherPool.Get().(*hasher)
h.parallel = parallel
return h
}
func returnHasherToPool(h *hasher) {
hasherPool.Put(h)
}
// hash collapses a node down into a hash node, also returning a copy of the
// original node initialized with the computed hash to replace the original one.
func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
// Return the cached hash if it's available
if hash, _ := n.cache(); hash != nil {
return hash, n
}
// Trie not processed yet, walk the children
switch n := n.(type) {
case *shortNode:
collapsed, cached := h.hashShortNodeChildren(n)
hashed := h.shortnodeToHash(collapsed, force)
// We need to retain the possibly _not_ hashed node, in case it was too
// small to be hashed
if hn, ok := hashed.(hashNode); ok {
cached.flags.hash = hn
} else {
cached.flags.hash = nil
}
return hashed, cached
case *fullNode:
collapsed, cached := h.hashFullNodeChildren(n)
hashed = h.fullnodeToHash(collapsed, force)
if hn, ok := hashed.(hashNode); ok {
cached.flags.hash = hn
} else {
cached.flags.hash = nil
}
return hashed, cached
default:
// Value and hash nodes don't have children, so they're left as were
return n, n
}
}
// hashShortNodeChildren collapses the short node. The returned collapsed node
// holds a live reference to the Key, and must not be modified.
func (h *hasher) hashShortNodeChildren(n *shortNode) (collapsed, cached *shortNode) {
// Hash the short node's child, caching the newly hashed subtree
collapsed, cached = n.copy(), n.copy()
// Previously, we did copy this one. We don't seem to need to actually
// do that, since we don't overwrite/reuse keys
// cached.Key = common.CopyBytes(n.Key)
collapsed.Key = hexToCompact(n.Key)
// Unless the child is a valuenode or hashnode, hash it
switch n.Val.(type) {
case *fullNode, *shortNode:
collapsed.Val, cached.Val = h.hash(n.Val, false)
}
return collapsed, cached
}
func (h *hasher) hashFullNodeChildren(n *fullNode) (collapsed *fullNode, cached *fullNode) {
// Hash the full node's children, caching the newly hashed subtrees
cached = n.copy()
collapsed = n.copy()
if h.parallel {
var wg sync.WaitGroup
wg.Add(16)
for i := 0; i < 16; i++ {
go func(i int) {
hasher := newHasher(false)
if child := n.Children[i]; child != nil {
collapsed.Children[i], cached.Children[i] = hasher.hash(child, false)
} else {
collapsed.Children[i] = nilValueNode
}
returnHasherToPool(hasher)
wg.Done()
}(i)
}
wg.Wait()
} else {
for i := 0; i < 16; i++ {
if child := n.Children[i]; child != nil {
collapsed.Children[i], cached.Children[i] = h.hash(child, false)
} else {
collapsed.Children[i] = nilValueNode
}
}
}
return collapsed, cached
}
// shortnodeToHash creates a hashNode from a shortNode. The supplied shortnode
// should have hex-type Key, which will be converted (without modification)
// into compact form for RLP encoding.
// If the rlp data is smaller than 32 bytes, `nil` is returned.
func (h *hasher) shortnodeToHash(n *shortNode, force bool) node {
n.encode(h.encbuf)
enc := h.encodedBytes()
if len(enc) < 32 && !force {
return n // Nodes smaller than 32 bytes are stored inside their parent
}
return h.hashData(enc)
}
// fullnodeToHash is used to create a hashNode from a fullNode, (which
// may contain nil values)
func (h *hasher) fullnodeToHash(n *fullNode, force bool) node {
n.encode(h.encbuf)
enc := h.encodedBytes()
if len(enc) < 32 && !force {
return n // Nodes smaller than 32 bytes are stored inside their parent
}
return h.hashData(enc)
}
// encodedBytes returns the result of the last encoding operation on h.encbuf.
// This also resets the encoder buffer.
//
// All node encoding must be done like this:
//
// node.encode(h.encbuf)
// enc := h.encodedBytes()
//
// This convention exists because node.encode can only be inlined/escape-analyzed when
// called on a concrete receiver type.
func (h *hasher) encodedBytes() []byte {
h.tmp = h.encbuf.AppendToBytes(h.tmp[:0])
h.encbuf.Reset(nil)
return h.tmp
}
// hashData hashes the provided data
func (h *hasher) hashData(data []byte) hashNode {
n := make(hashNode, 32)
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(n)
return n
}
// proofHash is used to construct trie proofs, and returns the 'collapsed'
// node (for later RLP encoding) as well as the hashed node -- unless the
// node is smaller than 32 bytes, in which case it will be returned as is.
// This method does not do anything on value- or hash-nodes.
func (h *hasher) proofHash(original node) (collapsed, hashed node) {
switch n := original.(type) {
case *shortNode:
sn, _ := h.hashShortNodeChildren(n)
return sn, h.shortnodeToHash(sn, false)
case *fullNode:
fn, _ := h.hashFullNodeChildren(n)
return fn, h.fullnodeToHash(fn, false)
default:
// Value and hash nodes don't have children, so they're left as were
return n, n
}
}