diff --git a/trie/committer.go b/trie/committer.go index 9f978873a..af5f5f1a1 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -142,12 +142,10 @@ func (c *committer) store(path []byte, n node) node { // We have the hash already, estimate the RLP encoding-size of the node. // The size is used for mem tracking, does not need to be exact var ( - size = estimateSize(n) nhash = common.BytesToHash(hash) mnode = &memoryNode{ hash: nhash, - node: simplifyNode(n), - size: uint16(size), + node: nodeToBytes(n), } ) // Collect the dirty node to nodeset for return. @@ -166,31 +164,29 @@ func (c *committer) store(path []byte, n node) node { return hash } -// estimateSize estimates the size of an rlp-encoded node, without actually -// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie -// with 1000 leaves, the only errors above 1% are on small shortnodes, where this -// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35) -func estimateSize(n node) int { +// mptResolver the children resolver in merkle-patricia-tree. +type mptResolver struct{} + +// ForEach implements childResolver, decodes the provided node and +// traverses the children inside. +func (resolver mptResolver) forEach(node []byte, onChild func(common.Hash)) { + forGatherChildren(mustDecodeNodeUnsafe(nil, node), onChild) +} + +// forGatherChildren traverses the node hierarchy and invokes the callback +// for all the hashnode children. +func forGatherChildren(n node, onChild func(hash common.Hash)) { switch n := n.(type) { case *shortNode: - // A short node contains a compacted key, and a value. - return 3 + len(n.Key) + estimateSize(n.Val) + forGatherChildren(n.Val, onChild) case *fullNode: - // A full node contains up to 16 hashes (some nils), and a key - s := 3 for i := 0; i < 16; i++ { - if child := n.Children[i]; child != nil { - s += estimateSize(child) - } else { - s++ - } + forGatherChildren(n.Children[i], onChild) } - return s - case valueNode: - return 1 + len(n) case hashNode: - return 1 + len(n) + onChild(common.BytesToHash(n)) + case valueNode, nil: default: - panic(fmt.Sprintf("node type %T", n)) + panic(fmt.Sprintf("unknown node type: %T", n)) } } diff --git a/trie/database.go b/trie/database.go index 200ed3674..c1f7ddd75 100644 --- a/trie/database.go +++ b/trie/database.go @@ -18,8 +18,6 @@ package trie import ( "errors" - "fmt" - "io" "reflect" "runtime" "sync" @@ -59,6 +57,12 @@ var ( memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) ) +// childResolver defines the required method to decode the provided +// trie node and iterate the children on top. +type childResolver interface { + forEach(node []byte, onChild func(common.Hash)) +} + // Database is an intermediate write layer between the trie data structures and // the disk database. The aim is to accumulate trie writes in-memory and only // periodically flush a couple tries to disk, garbage collecting the remainder. @@ -68,7 +72,8 @@ var ( // behind this split design is to provide read access to RPC handlers and sync // servers even while the trie is executing expensive garbage collection. type Database struct { - diskdb ethdb.Database // Persistent storage for matured trie nodes + diskdb ethdb.Database // Persistent storage for matured trie nodes + resolver childResolver // The handler to resolve children of nodes cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes @@ -90,55 +95,14 @@ type Database struct { lock sync.RWMutex } -// rawNode is a simple binary blob used to differentiate between collapsed trie -// nodes and already encoded RLP binary blobs (while at the same time store them -// in the same cache fields). -type rawNode []byte - -func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } -func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } - -func (n rawNode) EncodeRLP(w io.Writer) error { - _, err := w.Write(n) - return err -} - -// rawFullNode represents only the useful data content of a full node, with the -// caches and flags stripped out to minimize its data storage. This type honors -// the same RLP encoding as the original parent. -type rawFullNode [17]node - -func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } -func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } - -func (n rawFullNode) EncodeRLP(w io.Writer) error { - eb := rlp.NewEncoderBuffer(w) - n.encode(eb) - return eb.Flush() -} - -// rawShortNode represents only the useful data content of a short node, with the -// caches and flags stripped out to minimize its data storage. This type honors -// the same RLP encoding as the original parent. -type rawShortNode struct { - Key []byte - Val node -} - -func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } -func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } - // cachedNode is all the information we know about a single cached trie node // in the memory database write layer. type cachedNode struct { - node node // Cached collapsed trie node, or raw rlp data - size uint16 // Byte size of the useful cached data - - parents uint32 // Number of live nodes referencing this one - children map[common.Hash]uint16 // External children referenced by this node - - flushPrev common.Hash // Previous node in the flush-list - flushNext common.Hash // Next node in the flush-list + node []byte // Encoded node blob + parents uint32 // Number of live nodes referencing this one + external map[common.Hash]struct{} // The set of external children + flushPrev common.Hash // Previous node in the flush-list + flushNext common.Hash // Next node in the flush-list } // cachedNodeSize is the raw size of a cachedNode data structure without any @@ -146,121 +110,14 @@ type cachedNode struct { // than not counting them. var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) -// cachedNodeChildrenSize is the raw size of an initialized but empty external -// reference map. -const cachedNodeChildrenSize = 48 - -// rlp returns the raw rlp encoded blob of the cached trie node, either directly -// from the cache, or by regenerating it from the collapsed node. -func (n *cachedNode) rlp() []byte { - if node, ok := n.node.(rawNode); ok { - return node - } - return nodeToBytes(n.node) -} - -// obj returns the decoded and expanded trie node, either directly from the cache, -// or by regenerating it from the rlp encoded blob. -func (n *cachedNode) obj(hash common.Hash) node { - if node, ok := n.node.(rawNode); ok { - // The raw-blob format nodes are loaded either from the - // clean cache or the database, they are all in their own - // copy and safe to use unsafe decoder. - return mustDecodeNodeUnsafe(hash[:], node) - } - return expandNode(hash[:], n.node) -} - -// forChilds invokes the callback for all the tracked children of this node, +// forChildren invokes the callback for all the tracked children of this node, // both the implicit ones from inside the node as well as the explicit ones // from outside the node. -func (n *cachedNode) forChilds(onChild func(hash common.Hash)) { - for child := range n.children { +func (n *cachedNode) forChildren(resolver childResolver, onChild func(hash common.Hash)) { + for child := range n.external { onChild(child) } - if _, ok := n.node.(rawNode); !ok { - forGatherChildren(n.node, onChild) - } -} - -// forGatherChildren traverses the node hierarchy of a collapsed storage node and -// invokes the callback for all the hashnode children. -func forGatherChildren(n node, onChild func(hash common.Hash)) { - switch n := n.(type) { - case *rawShortNode: - forGatherChildren(n.Val, onChild) - case rawFullNode: - for i := 0; i < 16; i++ { - forGatherChildren(n[i], onChild) - } - case hashNode: - onChild(common.BytesToHash(n)) - case valueNode, nil, rawNode: - default: - panic(fmt.Sprintf("unknown node type: %T", n)) - } -} - -// simplifyNode traverses the hierarchy of an expanded memory node and discards -// all the internal caches, returning a node that only contains the raw data. -func simplifyNode(n node) node { - switch n := n.(type) { - case *shortNode: - // Short nodes discard the flags and cascade - return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} - - case *fullNode: - // Full nodes discard the flags and cascade - node := rawFullNode(n.Children) - for i := 0; i < len(node); i++ { - if node[i] != nil { - node[i] = simplifyNode(node[i]) - } - } - return node - - case valueNode, hashNode, rawNode: - return n - - default: - panic(fmt.Sprintf("unknown node type: %T", n)) - } -} - -// expandNode traverses the node hierarchy of a collapsed storage node and converts -// all fields and keys into expanded memory form. -func expandNode(hash hashNode, n node) node { - switch n := n.(type) { - case *rawShortNode: - // Short nodes need key and child expansion - return &shortNode{ - Key: compactToHex(n.Key), - Val: expandNode(nil, n.Val), - flags: nodeFlag{ - hash: hash, - }, - } - - case rawFullNode: - // Full nodes need child expansion - node := &fullNode{ - flags: nodeFlag{ - hash: hash, - }, - } - for i := 0; i < len(node.Children); i++ { - if n[i] != nil { - node.Children[i] = expandNode(nil, n[i]) - } - } - return node - - case valueNode, hashNode: - return n - - default: - panic(fmt.Sprintf("unknown node type: %T", n)) - } + resolver.forEach(n.node, onChild) } // Config defines all necessary options for database. @@ -293,34 +150,31 @@ func NewDatabaseWithConfig(diskdb ethdb.Database, config *Config) *Database { if config != nil && config.Preimages { preimage = newPreimageStore(diskdb) } - db := &Database{ - diskdb: diskdb, - cleans: cleans, - dirties: map[common.Hash]*cachedNode{{}: { - children: make(map[common.Hash]uint16), - }}, + return &Database{ + diskdb: diskdb, + resolver: mptResolver{}, + cleans: cleans, + dirties: make(map[common.Hash]*cachedNode), preimages: preimage, } - return db } // insert inserts a simplified trie node into the memory database. // All nodes inserted by this function will be reference tracked // and in theory should only used for **trie nodes** insertion. -func (db *Database) insert(hash common.Hash, size int, node node) { +func (db *Database) insert(hash common.Hash, node []byte) { // If the node's already cached, skip if _, ok := db.dirties[hash]; ok { return } - memcacheDirtyWriteMeter.Mark(int64(size)) + memcacheDirtyWriteMeter.Mark(int64(len(node))) // Create the cached entry for this node entry := &cachedNode{ node: node, - size: uint16(size), flushPrev: db.newest, } - entry.forChilds(func(child common.Hash) { + entry.forChildren(db.resolver, func(child common.Hash) { if c := db.dirties[child]; c != nil { c.parents++ } @@ -333,48 +187,7 @@ func (db *Database) insert(hash common.Hash, size int, node node) { } else { db.dirties[db.newest].flushNext, db.newest = hash, hash } - db.dirtiesSize += common.StorageSize(common.HashLength + entry.size) -} - -// node retrieves a cached trie node from memory, or returns nil if none can be -// found in the memory cache. -func (db *Database) node(hash common.Hash) node { - // Retrieve the node from the clean cache if available - if db.cleans != nil { - if enc := db.cleans.Get(nil, hash[:]); enc != nil { - memcacheCleanHitMeter.Mark(1) - memcacheCleanReadMeter.Mark(int64(len(enc))) - - // The returned value from cache is in its own copy, - // safe to use mustDecodeNodeUnsafe for decoding. - return mustDecodeNodeUnsafe(hash[:], enc) - } - } - // Retrieve the node from the dirty cache if available - db.lock.RLock() - dirty := db.dirties[hash] - db.lock.RUnlock() - - if dirty != nil { - memcacheDirtyHitMeter.Mark(1) - memcacheDirtyReadMeter.Mark(int64(dirty.size)) - return dirty.obj(hash) - } - memcacheDirtyMissMeter.Mark(1) - - // Content unavailable in memory, attempt to retrieve from disk - enc, err := db.diskdb.Get(hash[:]) - if err != nil || enc == nil { - return nil - } - if db.cleans != nil { - db.cleans.Set(hash[:], enc) - memcacheCleanMissMeter.Mark(1) - memcacheCleanWriteMeter.Mark(int64(len(enc))) - } - // The returned value from database is in its own copy, - // safe to use mustDecodeNodeUnsafe for decoding. - return mustDecodeNodeUnsafe(hash[:], enc) + db.dirtiesSize += common.StorageSize(common.HashLength + len(node)) } // Node retrieves an encoded cached trie node from memory. If it cannot be found @@ -399,8 +212,8 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) { if dirty != nil { memcacheDirtyHitMeter.Mark(1) - memcacheDirtyReadMeter.Mark(int64(dirty.size)) - return dirty.rlp(), nil + memcacheDirtyReadMeter.Mark(int64(len(dirty.node))) + return dirty.node, nil } memcacheDirtyMissMeter.Mark(1) @@ -426,9 +239,7 @@ func (db *Database) Nodes() []common.Hash { var hashes = make([]common.Hash, 0, len(db.dirties)) for hash := range db.dirties { - if hash != (common.Hash{}) { // Special case for "root" references/nodes - hashes = append(hashes, hash) - } + hashes = append(hashes, hash) } return hashes } @@ -451,18 +262,22 @@ func (db *Database) reference(child common.Hash, parent common.Hash) { if !ok { return } - // If the reference already exists, only duplicate for roots - if db.dirties[parent].children == nil { - db.dirties[parent].children = make(map[common.Hash]uint16) - db.childrenSize += cachedNodeChildrenSize - } else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) { + // The reference is for state root, increase the reference counter. + if parent == (common.Hash{}) { + node.parents += 1 + return + } + // The reference is for external storage trie, don't duplicate if + // the reference is already existent. + if db.dirties[parent].external == nil { + db.dirties[parent].external = make(map[common.Hash]struct{}) + } + if _, ok := db.dirties[parent].external[child]; ok { return } node.parents++ - db.dirties[parent].children[child]++ - if db.dirties[parent].children[child] == 1 { - db.childrenSize += common.HashLength + 2 // uint16 counter - } + db.dirties[parent].external[child] = struct{}{} + db.childrenSize += common.HashLength } // Dereference removes an existing reference from a root node. @@ -476,7 +291,7 @@ func (db *Database) Dereference(root common.Hash) { defer db.lock.Unlock() nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() - db.dereference(root, common.Hash{}) + db.dereference(root) db.gcnodes += uint64(nodes - len(db.dirties)) db.gcsize += storage - db.dirtiesSize @@ -491,23 +306,13 @@ func (db *Database) Dereference(root common.Hash) { } // dereference is the private locked version of Dereference. -func (db *Database) dereference(child common.Hash, parent common.Hash) { - // Dereference the parent-child - node := db.dirties[parent] - - if node.children != nil && node.children[child] > 0 { - node.children[child]-- - if node.children[child] == 0 { - delete(node.children, child) - db.childrenSize -= (common.HashLength + 2) // uint16 counter - } - } - // If the child does not exist, it's a previously committed node. - node, ok := db.dirties[child] +func (db *Database) dereference(hash common.Hash) { + // If the node does not exist, it's a previously committed node. + node, ok := db.dirties[hash] if !ok { return } - // If there are no more references to the child, delete it and cascade + // If there are no more references to the node, delete it and cascade if node.parents > 0 { // This is a special cornercase where a node loaded from disk (i.e. not in the // memcache any more) gets reinjected as a new node (short node split into full, @@ -517,25 +322,29 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) { } if node.parents == 0 { // Remove the node from the flush-list - switch child { + switch hash { case db.oldest: db.oldest = node.flushNext - db.dirties[node.flushNext].flushPrev = common.Hash{} + if node.flushNext != (common.Hash{}) { + db.dirties[node.flushNext].flushPrev = common.Hash{} + } case db.newest: db.newest = node.flushPrev - db.dirties[node.flushPrev].flushNext = common.Hash{} + if node.flushPrev != (common.Hash{}) { + db.dirties[node.flushPrev].flushNext = common.Hash{} + } default: db.dirties[node.flushPrev].flushNext = node.flushNext db.dirties[node.flushNext].flushPrev = node.flushPrev } // Dereference all children and delete the node - node.forChilds(func(hash common.Hash) { - db.dereference(hash, child) + node.forChildren(db.resolver, func(child common.Hash) { + db.dereference(child) }) - delete(db.dirties, child) - db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - if node.children != nil { - db.childrenSize -= cachedNodeChildrenSize + delete(db.dirties, hash) + db.dirtiesSize -= common.StorageSize(common.HashLength + len(node.node)) + if node.external != nil { + db.childrenSize -= common.StorageSize(len(node.external) * common.HashLength) } } } @@ -556,8 +365,8 @@ func (db *Database) Cap(limit common.StorageSize) error { // db.dirtiesSize only contains the useful data in the cache, but when reporting // the total memory consumption, the maintenance metadata is also needed to be // counted. - size := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize) - size += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2)) + size := db.dirtiesSize + common.StorageSize(len(db.dirties)*cachedNodeSize) + size += db.childrenSize // If the preimage cache got large enough, push to disk. If it's still small // leave for later to deduplicate writes. @@ -571,7 +380,7 @@ func (db *Database) Cap(limit common.StorageSize) error { for size > limit && oldest != (common.Hash{}) { // Fetch the oldest referenced node and push into the batch node := db.dirties[oldest] - rawdb.WriteLegacyTrieNode(batch, oldest, node.rlp()) + rawdb.WriteLegacyTrieNode(batch, oldest, node.node) // If we exceeded the ideal batch size, commit and reset if batch.ValueSize() >= ethdb.IdealBatchSize { @@ -584,9 +393,9 @@ func (db *Database) Cap(limit common.StorageSize) error { // Iterate to the next flush item, or abort if the size cap was achieved. Size // is the total size, including the useful cached data (hash -> blob), the // cache item metadata, as well as external children mappings. - size -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize) - if node.children != nil { - size -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) + size -= common.StorageSize(common.HashLength + len(node.node) + cachedNodeSize) + if node.external != nil { + size -= common.StorageSize(len(node.external) * common.HashLength) } oldest = node.flushNext } @@ -604,9 +413,9 @@ func (db *Database) Cap(limit common.StorageSize) error { delete(db.dirties, db.oldest) db.oldest = node.flushNext - db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - if node.children != nil { - db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) + db.dirtiesSize -= common.StorageSize(common.HashLength + len(node.node)) + if node.external != nil { + db.childrenSize -= common.StorageSize(len(node.external) * common.HashLength) } } if db.oldest != (common.Hash{}) { @@ -694,7 +503,9 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane return nil } var err error - node.forChilds(func(child common.Hash) { + + // Dereference all children and delete the node + node.forChildren(db.resolver, func(child common.Hash) { if err == nil { err = db.commit(child, batch, uncacher) } @@ -703,7 +514,7 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane return err } // If we've reached an optimal batch size, commit and start over - rawdb.WriteLegacyTrieNode(batch, hash, node.rlp()) + rawdb.WriteLegacyTrieNode(batch, hash, node.node) if batch.ValueSize() >= ethdb.IdealBatchSize { if err := batch.Write(); err != nil { return err @@ -742,19 +553,23 @@ func (c *cleaner) Put(key []byte, rlp []byte) error { switch hash { case c.db.oldest: c.db.oldest = node.flushNext - c.db.dirties[node.flushNext].flushPrev = common.Hash{} + if node.flushNext != (common.Hash{}) { + c.db.dirties[node.flushNext].flushPrev = common.Hash{} + } case c.db.newest: c.db.newest = node.flushPrev - c.db.dirties[node.flushPrev].flushNext = common.Hash{} + if node.flushPrev != (common.Hash{}) { + c.db.dirties[node.flushPrev].flushNext = common.Hash{} + } default: c.db.dirties[node.flushPrev].flushNext = node.flushNext c.db.dirties[node.flushNext].flushPrev = node.flushPrev } // Remove the node from the dirty cache delete(c.db.dirties, hash) - c.db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) - if node.children != nil { - c.db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) + c.db.dirtiesSize -= common.StorageSize(common.HashLength + len(node.node)) + if node.external != nil { + c.db.childrenSize -= common.StorageSize(len(node.external) * common.HashLength) } // Move the flushed node into the clean cache to prevent insta-reloads if c.db.cleans != nil { @@ -796,7 +611,7 @@ func (db *Database) Update(nodes *MergedNodeSet) error { if n.isDeleted() { return // ignore deletion } - db.insert(n.hash, int(n.size), n.node) + db.insert(n.hash, n.node) }) } // Link up the account trie and storage trie if the node points @@ -824,13 +639,12 @@ func (db *Database) Size() (common.StorageSize, common.StorageSize) { // db.dirtiesSize only contains the useful data in the cache, but when reporting // the total memory consumption, the maintenance metadata is also needed to be // counted. - var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize) - var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2)) + var metadataSize = common.StorageSize(len(db.dirties) * cachedNodeSize) var preimageSize common.StorageSize if db.preimages != nil { preimageSize = db.preimages.size() } - return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, preimageSize + return db.dirtiesSize + db.childrenSize + metadataSize, preimageSize } // GetReader retrieves a node reader belonging to the given state root. @@ -848,15 +662,9 @@ func newHashReader(db *Database) *hashReader { return &hashReader{db: db} } -// Node retrieves the trie node with the given node hash. +// Node retrieves the RLP-encoded trie node blob with the given node hash. // No error will be returned if the node is not found. -func (reader *hashReader) Node(_ common.Hash, _ []byte, hash common.Hash) (node, error) { - return reader.db.node(hash), nil -} - -// NodeBlob retrieves the RLP-encoded trie node blob with the given node hash. -// No error will be returned if the node is not found. -func (reader *hashReader) NodeBlob(_ common.Hash, _ []byte, hash common.Hash) ([]byte, error) { +func (reader *hashReader) Node(_ common.Hash, _ []byte, hash common.Hash) ([]byte, error) { blob, _ := reader.db.Node(hash) return blob, nil } diff --git a/trie/iterator.go b/trie/iterator.go index f42beec4a..6f054a724 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -387,7 +387,14 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { // loaded blob will be tracked, while it's not required here since // all loaded nodes won't be linked to trie at all and track nodes // may lead to out-of-memory issue. - return it.trie.reader.node(path, common.BytesToHash(hash)) + blob, err := it.trie.reader.node(path, common.BytesToHash(hash)) + if err != nil { + return nil, err + } + // The raw-blob format nodes are loaded either from the + // clean cache or the database, they are all in their own + // copy and safe to use unsafe decoder. + return mustDecodeNodeUnsafe(hash, blob), nil } func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) { @@ -401,7 +408,7 @@ func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) // loaded blob will be tracked, while it's not required here since // all loaded nodes won't be linked to trie at all and track nodes // may lead to out-of-memory issue. - return it.trie.reader.nodeBlob(path, common.BytesToHash(hash)) + return it.trie.reader.node(path, common.BytesToHash(hash)) } func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { diff --git a/trie/node.go b/trie/node.go index 6ce6551de..15bbf62f1 100644 --- a/trie/node.go +++ b/trie/node.go @@ -99,6 +99,19 @@ func (n valueNode) fstring(ind string) string { return fmt.Sprintf("%x ", []byte(n)) } +// rawNode is a simple binary blob used to differentiate between collapsed trie +// nodes and already encoded RLP binary blobs (while at the same time store them +// in the same cache fields). +type rawNode []byte + +func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } +func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } + +func (n rawNode) EncodeRLP(w io.Writer) error { + _, err := w.Write(n) + return err +} + // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered. func mustDecodeNode(hash, buf []byte) node { n, err := decodeNode(hash, buf) diff --git a/trie/node_enc.go b/trie/node_enc.go index cade35b70..1b2eca682 100644 --- a/trie/node_enc.go +++ b/trie/node_enc.go @@ -59,29 +59,6 @@ func (n valueNode) encode(w rlp.EncoderBuffer) { w.WriteBytes(n) } -func (n rawFullNode) encode(w rlp.EncoderBuffer) { - offset := w.List() - for _, c := range n { - if c != nil { - c.encode(w) - } else { - w.Write(rlp.EmptyString) - } - } - w.ListEnd(offset) -} - -func (n *rawShortNode) encode(w rlp.EncoderBuffer) { - offset := w.List() - w.WriteBytes(n.Key) - if n.Val != nil { - n.Val.encode(w) - } else { - w.Write(rlp.EmptyString) - } - w.ListEnd(offset) -} - func (n rawNode) encode(w rlp.EncoderBuffer) { w.Write(n) } diff --git a/trie/nodeset.go b/trie/nodeset.go index 99e4a80fa..fc2111375 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -18,7 +18,6 @@ package trie import ( "fmt" - "reflect" "sort" "strings" @@ -28,41 +27,28 @@ import ( // memoryNode is all the information we know about a single cached trie node // in the memory. type memoryNode struct { - hash common.Hash // Node hash, computed by hashing rlp value, empty for deleted nodes - size uint16 // Byte size of the useful cached data, 0 for deleted nodes - node node // Cached collapsed trie node, or raw rlp data, nil for deleted nodes + hash common.Hash // Node hash by hashing node blob, empty for deleted nodes + node []byte // Encoded node blob, nil for deleted nodes } -// memoryNodeSize is the raw size of a memoryNode data structure without any -// node data included. It's an approximate size, but should be a lot better -// than not counting them. -// nolint:unused -var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) - // memorySize returns the total memory size used by this node. // nolint:unused func (n *memoryNode) memorySize(pathlen int) int { - return int(n.size) + memoryNodeSize + pathlen + return len(n.node) + common.HashLength + pathlen } // rlp returns the raw rlp encoded blob of the cached trie node, either directly // from the cache, or by regenerating it from the collapsed node. // nolint:unused func (n *memoryNode) rlp() []byte { - if node, ok := n.node.(rawNode); ok { - return node - } - return nodeToBytes(n.node) + return n.node } // obj returns the decoded and expanded trie node, either directly from the cache, // or by regenerating it from the rlp encoded blob. // nolint:unused func (n *memoryNode) obj() node { - if node, ok := n.node.(rawNode); ok { - return mustDecodeNode(n.hash[:], node) - } - return expandNode(n.hash[:], n.node) + return mustDecodeNode(n.hash[:], n.node) } // isDeleted returns the indicator if the node is marked as deleted. diff --git a/trie/proof.go b/trie/proof.go index 65df7577b..a25263d82 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -64,12 +64,15 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e // loaded blob will be tracked, while it's not required here since // all loaded nodes won't be linked to trie at all and track nodes // may lead to out-of-memory issue. - var err error - tn, err = t.reader.node(prefix, common.BytesToHash(n)) + blob, err := t.reader.node(prefix, common.BytesToHash(n)) if err != nil { log.Error("Unhandled trie error in Trie.Prove", "err", err) return err } + // The raw-blob format nodes are loaded either from the + // clean cache or the database, they are all in their own + // copy and safe to use unsafe decoder. + tn = mustDecodeNodeUnsafe(n, blob) default: panic(fmt.Sprintf("%T: invalid node: %v", tn, tn)) } diff --git a/trie/stacktrie.go b/trie/stacktrie.go index 030d2a596..ee1ce2829 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -420,17 +420,17 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) { return case branchNode: - var nodes rawFullNode + var nodes fullNode for i, child := range st.children { if child == nil { - nodes[i] = nilValueNode + nodes.Children[i] = nilValueNode continue } child.hashRec(hasher, append(path, byte(i))) if len(child.val) < 32 { - nodes[i] = rawNode(child.val) + nodes.Children[i] = rawNode(child.val) } else { - nodes[i] = hashNode(child.val) + nodes.Children[i] = hashNode(child.val) } // Release child back to pool. @@ -444,7 +444,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) { case extNode: st.children[0].hashRec(hasher, append(path, st.key...)) - n := rawShortNode{Key: hexToCompact(st.key)} + n := shortNode{Key: hexToCompact(st.key)} if len(st.children[0].val) < 32 { n.Val = rawNode(st.children[0].val) } else { @@ -460,7 +460,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) { case leafNode: st.key = append(st.key, byte(16)) - n := rawShortNode{Key: hexToCompact(st.key), Val: valueNode(st.val)} + n := shortNode{Key: hexToCompact(st.key), Val: valueNode(st.val)} n.encode(hasher.encbuf) encodedNode = hasher.encodedBytes() diff --git a/trie/trie.go b/trie/trie.go index 4b6f6a55b..18504dc5b 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -212,7 +212,7 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod if hash == nil { return nil, origNode, 0, errors.New("non-consensus node") } - blob, err := t.reader.nodeBlob(path, common.BytesToHash(hash)) + blob, err := t.reader.node(path, common.BytesToHash(hash)) return blob, origNode, 1, err } // Path still needs to be traversed, descend into children @@ -549,7 +549,7 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) { // node's original value. The rlp-encoded blob is preferred to be loaded from // database because it's easy to decode node while complex to encode node to blob. func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { - blob, err := t.reader.nodeBlob(prefix, common.BytesToHash(n)) + blob, err := t.reader.node(prefix, common.BytesToHash(n)) if err != nil { return nil, err } diff --git a/trie/trie_reader.go b/trie/trie_reader.go index 14186159b..e3f98a806 100644 --- a/trie/trie_reader.go +++ b/trie/trie_reader.go @@ -22,17 +22,12 @@ import ( "github.com/ethereum/go-ethereum/common" ) -// Reader wraps the Node and NodeBlob method of a backing trie store. +// Reader wraps the Node method of a backing trie store. type Reader interface { - // Node retrieves the trie node with the provided trie identifier, hexary - // node path and the corresponding node hash. - // No error will be returned if the node is not found. - Node(owner common.Hash, path []byte, hash common.Hash) (node, error) - - // NodeBlob retrieves the RLP-encoded trie node blob with the provided trie - // identifier, hexary node path and the corresponding node hash. - // No error will be returned if the node is not found. - NodeBlob(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) + // Node retrieves the RLP-encoded trie node blob with the provided trie + // identifier, node path and the corresponding node hash. No error will + // be returned if the node is not found. + Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) } // NodeReader wraps all the necessary functions for accessing trie node. @@ -65,30 +60,10 @@ func newEmptyReader() *trieReader { return &trieReader{} } -// node retrieves the trie node with the provided trie node information. -// An MissingNodeError will be returned in case the node is not found or -// any error is encountered. -func (r *trieReader) node(path []byte, hash common.Hash) (node, error) { - // Perform the logics in tests for preventing trie node access. - if r.banned != nil { - if _, ok := r.banned[string(path)]; ok { - return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path} - } - } - if r.reader == nil { - return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path} - } - node, err := r.reader.Node(r.owner, path, hash) - if err != nil || node == nil { - return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err} - } - return node, nil -} - // node retrieves the rlp-encoded trie node with the provided trie node // information. An MissingNodeError will be returned in case the node is // not found or any error is encountered. -func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) { +func (r *trieReader) node(path []byte, hash common.Hash) ([]byte, error) { // Perform the logics in tests for preventing trie node access. if r.banned != nil { if _, ok := r.banned[string(path)]; ok { @@ -98,7 +73,7 @@ func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) { if r.reader == nil { return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path} } - blob, err := r.reader.NodeBlob(r.owner, path, hash) + blob, err := r.reader.Node(r.owner, path, hash) if err != nil || len(blob) == 0 { return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err} }