From 13ef21d4672742e805316df9f319d51e749a129d Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 20 Feb 2023 22:54:52 +0800 Subject: [PATCH] Revert "core/trie: remove trie tracer (#26665)" (#26732) This reverts commit 7c749c947a9d5181f5f2c1b3fdb5ea6b0e401e8e. --- core/state/metrics.go | 14 +- core/state/statedb.go | 29 ++-- trie/committer.go | 29 +++- trie/database.go | 9 +- trie/nodeset.go | 120 ++++++++--------- trie/trie.go | 71 ++++++---- trie/trie_test.go | 66 ++++++++- trie/util_test.go | 304 ++++++++++++++++++++++++++++++++++++++++++ trie/utils.go | 199 +++++++++++++++++++++++++++ 9 files changed, 725 insertions(+), 116 deletions(-) create mode 100644 trie/util_test.go create mode 100644 trie/utils.go diff --git a/core/state/metrics.go b/core/state/metrics.go index 086666b7d..e702ef3a8 100644 --- a/core/state/metrics.go +++ b/core/state/metrics.go @@ -19,10 +19,12 @@ package state import "github.com/ethereum/go-ethereum/metrics" var ( - accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) - storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) - accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) - storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) - accountTrieNodesMeter = metrics.NewRegisteredMeter("state/trie/account", nil) - storageTriesNodesMeter = metrics.NewRegisteredMeter("state/trie/storage", nil) + accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) + storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) + accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) + storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) + accountTrieUpdatedMeter = metrics.NewRegisteredMeter("state/update/accountnodes", nil) + storageTriesUpdatedMeter = metrics.NewRegisteredMeter("state/update/storagenodes", nil) + accountTrieDeletedMeter = metrics.NewRegisteredMeter("state/delete/accountnodes", nil) + storageTriesDeletedMeter = metrics.NewRegisteredMeter("state/delete/storagenodes", nil) ) diff --git a/core/state/statedb.go b/core/state/statedb.go index 4385c1976..3f4bec239 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -970,11 +970,13 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { // Commit objects to the trie, measuring the elapsed time var ( - accountTrieNodes int - storageTrieNodes int - nodes = trie.NewMergedNodeSet() - codeWriter = s.db.DiskDB().NewBatch() + accountTrieNodesUpdated int + accountTrieNodesDeleted int + storageTrieNodesUpdated int + storageTrieNodesDeleted int + nodes = trie.NewMergedNodeSet() ) + codeWriter := s.db.DiskDB().NewBatch() for addr := range s.stateObjectsDirty { if obj := s.stateObjects[addr]; !obj.deleted { // Write any contract code associated with the state object @@ -992,9 +994,17 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err := nodes.Merge(set); err != nil { return common.Hash{}, err } - storageTrieNodes += set.Size() + updates, deleted := set.Size() + storageTrieNodesUpdated += updates + storageTrieNodesDeleted += deleted } } + // If the contract is destructed, the storage is still left in the + // database as dangling data. Theoretically it's should be wiped from + // database as well, but in hash-based-scheme it's extremely hard to + // determine that if the trie nodes are also referenced by other storage, + // and in path-based-scheme some technical challenges are still unsolved. + // Although it won't affect the correctness but please fix it TODO(rjl493456442). } if len(s.stateObjectsDirty) > 0 { s.stateObjectsDirty = make(map[common.Address]struct{}) @@ -1015,7 +1025,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err := nodes.Merge(set); err != nil { return common.Hash{}, err } - accountTrieNodes = set.Size() + accountTrieNodesUpdated, accountTrieNodesDeleted = set.Size() } if metrics.EnabledExpensive { s.AccountCommits += time.Since(start) @@ -1024,9 +1034,10 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { storageUpdatedMeter.Mark(int64(s.StorageUpdated)) accountDeletedMeter.Mark(int64(s.AccountDeleted)) storageDeletedMeter.Mark(int64(s.StorageDeleted)) - accountTrieNodesMeter.Mark(int64(accountTrieNodes)) - storageTriesNodesMeter.Mark(int64(storageTrieNodes)) - + accountTrieUpdatedMeter.Mark(int64(accountTrieNodesUpdated)) + accountTrieDeletedMeter.Mark(int64(accountTrieNodesDeleted)) + storageTriesUpdatedMeter.Mark(int64(storageTrieNodesUpdated)) + storageTriesDeletedMeter.Mark(int64(storageTrieNodesDeleted)) s.AccountUpdated, s.AccountDeleted = 0, 0 s.StorageUpdated, s.StorageDeleted = 0, 0 } diff --git a/trie/committer.go b/trie/committer.go index 745cb7683..c4957f349 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -33,22 +33,29 @@ type leaf struct { // insertion order. type committer struct { nodes *NodeSet + tracer *tracer collectLeaf bool } // newCommitter creates a new committer or picks one from the pool. -func newCommitter(nodes *NodeSet, collectLeaf bool) *committer { +func newCommitter(owner common.Hash, tracer *tracer, collectLeaf bool) *committer { return &committer{ - nodes: nodes, + nodes: NewNodeSet(owner), + tracer: tracer, collectLeaf: collectLeaf, } } // Commit collapses a node down into a hash node and returns it along with // the modified nodeset. -func (c *committer) Commit(n node) hashNode { +func (c *committer) Commit(n node) (hashNode, *NodeSet) { h := c.commit(nil, n) - return h.(hashNode) + // Some nodes can be deleted from trie which can't be captured + // by committer itself. Iterate all deleted nodes tracked by + // tracer and marked them as deleted only if they are present + // in database previously. + c.tracer.markDeletions(c.nodes) + return h.(hashNode), c.nodes } // commit collapses a node down into a hash node and returns it. @@ -78,6 +85,12 @@ func (c *committer) commit(path []byte, n node) node { if hn, ok := hashedNode.(hashNode); ok { return hn } + // The short node now is embedded in its parent. Mark the node as + // deleted if it's present in database previously. It's equivalent + // as deletion from database's perspective. + if prev := c.tracer.getPrev(path); len(prev) != 0 { + c.nodes.markDeleted(path, prev) + } return collapsed case *fullNode: hashedKids := c.commitChildren(path, cn) @@ -88,6 +101,12 @@ func (c *committer) commit(path []byte, n node) node { if hn, ok := hashedNode.(hashNode); ok { return hn } + // The full node now is embedded in its parent. Mark the node as + // deleted if it's present in database previously. It's equivalent + // as deletion from database's perspective. + if prev := c.tracer.getPrev(path); len(prev) != 0 { + c.nodes.markDeleted(path, prev) + } return collapsed case hashNode: return cn @@ -150,7 +169,7 @@ func (c *committer) store(path []byte, n node) node { } ) // Collect the dirty node to nodeset for return. - c.nodes.markUpdated(path, mnode) + c.nodes.markUpdated(path, mnode, c.tracer.getPrev(path)) // Collect the corresponding leaf node if it's required. We don't check // full node since it's impossible to store value in fullNode. The key diff --git a/trie/database.go b/trie/database.go index 5d47475e0..74247d59c 100644 --- a/trie/database.go +++ b/trie/database.go @@ -792,12 +792,13 @@ func (db *Database) Update(nodes *MergedNodeSet) error { } for _, owner := range order { subset := nodes.sets[owner] - subset.forEachWithOrder(false, func(path string, n *memoryNode) { - if n.isDeleted() { - return // ignore deletion + for _, path := range subset.updates.order { + n, ok := subset.updates.nodes[path] + if !ok { + return fmt.Errorf("missing node %x %v", owner, path) } db.insert(n.hash, int(n.size), n.node) - }) + } } // Link up the account trie and storage trie if the node points // to an account trie leaf. diff --git a/trie/nodeset.go b/trie/nodeset.go index 4251eccaf..928172350 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -19,7 +19,6 @@ package trie import ( "fmt" "reflect" - "sort" "strings" "github.com/ethereum/go-ethereum/common" @@ -41,8 +40,8 @@ var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) // memorySize returns the total memory size used by this node. // nolint:unused -func (n *memoryNode) memorySize(pathlen int) int { - return int(n.size) + memoryNodeSize + pathlen +func (n *memoryNode) memorySize(key int) int { + return int(n.size) + memoryNodeSize + key } // rlp returns the raw rlp encoded blob of the cached trie node, either directly @@ -65,20 +64,14 @@ func (n *memoryNode) obj() node { return expandNode(n.hash[:], n.node) } -// isDeleted returns the indicator if the node is marked as deleted. -func (n *memoryNode) isDeleted() bool { - return n.hash == (common.Hash{}) -} - // nodeWithPrev wraps the memoryNode with the previous node value. -// nolint: unused type nodeWithPrev struct { *memoryNode prev []byte // RLP-encoded previous value, nil means it's non-existent } // unwrap returns the internal memoryNode object. -// nolint: unused +// nolint:unused func (n *nodeWithPrev) unwrap() *memoryNode { return n.memoryNode } @@ -86,69 +79,64 @@ func (n *nodeWithPrev) unwrap() *memoryNode { // memorySize returns the total memory size used by this node. It overloads // the function in memoryNode by counting the size of previous value as well. // nolint: unused -func (n *nodeWithPrev) memorySize(pathlen int) int { - return n.memoryNode.memorySize(pathlen) + len(n.prev) +func (n *nodeWithPrev) memorySize(key int) int { + return n.memoryNode.memorySize(key) + len(n.prev) +} + +// nodesWithOrder represents a collection of dirty nodes which includes +// newly-inserted and updated nodes. The modification order of all nodes +// is represented by order list. +type nodesWithOrder struct { + order []string // the path list of dirty nodes, sort by insertion order + nodes map[string]*nodeWithPrev // the map of dirty nodes, keyed by node path } // NodeSet contains all dirty nodes collected during the commit operation. // Each node is keyed by path. It's not thread-safe to use. type NodeSet struct { - owner common.Hash // the identifier of the trie - nodes map[string]*memoryNode // the set of dirty nodes(inserted, updated, deleted) - leaves []*leaf // the list of dirty leaves - accessList map[string][]byte // The list of accessed nodes, which records the original node value + owner common.Hash // the identifier of the trie + updates *nodesWithOrder // the set of updated nodes(newly inserted, updated) + deletes map[string][]byte // the map of deleted nodes, keyed by node + leaves []*leaf // the list of dirty leaves } // NewNodeSet initializes an empty node set to be used for tracking dirty nodes -// for a specific account or storage trie. The owner is zero for the account trie -// and the owning account address hash for storage tries. The provided accessList -// represents the original value of accessed nodes, it can be optional but would -// be beneficial for speeding up the construction of trie history. -func NewNodeSet(owner common.Hash, accessList map[string][]byte) *NodeSet { - // Don't panic for lazy users - if accessList == nil { - accessList = make(map[string][]byte) - } +// from a specific account or storage trie. The owner is zero for the account +// trie and the owning account address hash for storage tries. +func NewNodeSet(owner common.Hash) *NodeSet { return &NodeSet{ - owner: owner, - nodes: make(map[string]*memoryNode), - accessList: accessList, + owner: owner, + updates: &nodesWithOrder{ + nodes: make(map[string]*nodeWithPrev), + }, + deletes: make(map[string][]byte), } } -// forEachWithOrder iterates the dirty nodes with the specified order. -// If topToBottom is true: -// -// then the order of iteration is top to bottom, left to right. -// -// If topToBottom is false: -// -// then the order of iteration is bottom to top, right to left. -func (set *NodeSet) forEachWithOrder(topToBottom bool, callback func(path string, n *memoryNode)) { - var paths sort.StringSlice - for path := range set.nodes { - paths = append(paths, path) - } - if topToBottom { - paths.Sort() - } else { - sort.Sort(sort.Reverse(paths)) - } - for _, path := range paths { - callback(path, set.nodes[path]) +/* +// NewNodeSetWithDeletion initializes the nodeset with provided deletion set. +func NewNodeSetWithDeletion(owner common.Hash, paths [][]byte, prev [][]byte) *NodeSet { + set := NewNodeSet(owner) + for i, path := range paths { + set.markDeleted(path, prev[i]) } + return set } +*/ // markUpdated marks the node as dirty(newly-inserted or updated) with provided // node path, node object along with its previous value. -func (set *NodeSet) markUpdated(path []byte, node *memoryNode) { - set.nodes[string(path)] = node +func (set *NodeSet) markUpdated(path []byte, node *memoryNode, prev []byte) { + set.updates.order = append(set.updates.order, string(path)) + set.updates.nodes[string(path)] = &nodeWithPrev{ + memoryNode: node, + prev: prev, + } } // markDeleted marks the node as deleted with provided path and previous value. -// nolint: unused -func (set *NodeSet) markDeleted(path []byte) { - set.nodes[string(path)] = &memoryNode{} +func (set *NodeSet) markDeleted(path []byte, prev []byte) { + set.deletes[string(path)] = prev } // addLeaf collects the provided leaf node into set. @@ -156,16 +144,16 @@ func (set *NodeSet) addLeaf(node *leaf) { set.leaves = append(set.leaves, node) } -// Size returns the number of dirty nodes contained in the set. -func (set *NodeSet) Size() int { - return len(set.nodes) +// Size returns the number of updated and deleted nodes contained in the set. +func (set *NodeSet) Size() (int, int) { + return len(set.updates.order), len(set.deletes) } // Hashes returns the hashes of all updated nodes. TODO(rjl493456442) how can // we get rid of it? func (set *NodeSet) Hashes() []common.Hash { var ret []common.Hash - for _, node := range set.nodes { + for _, node := range set.updates.nodes { ret = append(ret, node.hash) } return ret @@ -175,17 +163,19 @@ func (set *NodeSet) Hashes() []common.Hash { func (set *NodeSet) Summary() string { var out = new(strings.Builder) fmt.Fprintf(out, "nodeset owner: %v\n", set.owner) - if set.nodes != nil { - for path, n := range set.nodes { - // Deletion - if n.isDeleted() { - fmt.Fprintf(out, " [-]: %x\n", path) - continue + if set.updates != nil { + for _, key := range set.updates.order { + updated := set.updates.nodes[key] + if updated.prev != nil { + fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", key, updated.hash, updated.prev) + } else { + fmt.Fprintf(out, " [+]: %x -> %v\n", key, updated.hash) } - // Update - fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.hash) } } + for k, n := range set.deletes { + fmt.Fprintf(out, " [-]: %x -> %x\n", k, n) + } for _, n := range set.leaves { fmt.Fprintf(out, "[leaf]: %v\n", n) } diff --git a/trie/trie.go b/trie/trie.go index 65f89d91f..c467ac476 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -51,11 +51,12 @@ type Trie struct { // actually unhashed nodes. unhashed int - // accessList tracks the loaded nodes from database. - accessList map[string][]byte - // reader is the handler trie can retrieve nodes from. reader *trieReader + + // tracer is the tool to track the trie changes. + // It will be reset after each commit operation. + tracer *tracer } // newFlag returns the cache flag value for a newly created node. @@ -65,16 +66,12 @@ func (t *Trie) newFlag() nodeFlag { // Copy returns a copy of Trie. func (t *Trie) Copy() *Trie { - accessList := make(map[string][]byte) - for path, blob := range t.accessList { - accessList[path] = common.CopyBytes(blob) - } return &Trie{ - root: t.root, - owner: t.owner, - unhashed: t.unhashed, - reader: t.reader, - accessList: accessList, + root: t.root, + owner: t.owner, + unhashed: t.unhashed, + reader: t.reader, + tracer: t.tracer.copy(), } } @@ -90,9 +87,9 @@ func New(id *ID, db NodeReader) (*Trie, error) { return nil, err } trie := &Trie{ - owner: id.Owner, - reader: reader, - accessList: make(map[string][]byte), + owner: id.Owner, + reader: reader, + //tracer: newTracer(), } if id.Root != (common.Hash{}) && id.Root != emptyRoot { rootnode, err := trie.resolveAndTrack(id.Root[:], nil) @@ -329,6 +326,11 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error if matchlen == 0 { return true, branch, nil } + // New branch node is created as a child of the original short node. + // Track the newly inserted node in the tracer. The node identifier + // passed is the path from the root node. + t.tracer.onInsert(append(prefix, key[:matchlen]...)) + // Replace it with a short node leading up to the branch. return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil @@ -343,6 +345,11 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error return true, n, nil case nil: + // New short node is created and track it in the tracer. The node identifier + // passed is the path from the root node. Note the valueNode won't be tracked + // since it's always embedded in its parent. + t.tracer.onInsert(prefix) + return true, &shortNode{key, value, t.newFlag()}, nil case hashNode: @@ -395,6 +402,11 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return false, n, nil // don't replace n on mismatch } if matchlen == len(key) { + // The matched short node is deleted entirely and track + // it in the deletion set. The same the valueNode doesn't + // need to be tracked at all since it's always embedded. + t.tracer.onDelete(prefix) + return true, nil, nil // remove n entirely for whole matches } // The key is longer than n.Key. Remove the remaining suffix @@ -407,6 +419,10 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { } switch child := child.(type) { case *shortNode: + // The child shortNode is merged into its parent, track + // is deleted as well. + t.tracer.onDelete(append(prefix, n.Key...)) + // Deleting from the subtrie reduced it to another // short node. Merge the nodes to avoid creating a // shortNode{..., shortNode{...}}. Use concat (which @@ -468,6 +484,11 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return false, nil, err } if cnode, ok := cnode.(*shortNode); ok { + // Replace the entire full node with the short node. + // Mark the original short node as deleted since the + // value is embedded into the parent now. + t.tracer.onDelete(append(prefix, byte(pos))) + k := append([]byte{byte(pos)}, cnode.Key...) return true, &shortNode{k, cnode.Val, t.newFlag()}, nil } @@ -527,7 +548,7 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { if err != nil { return nil, err } - t.accessList[string(prefix)] = blob + t.tracer.onRead(prefix, blob) return mustDecodeNode(n, blob), nil } @@ -546,15 +567,16 @@ func (t *Trie) Hash() common.Hash { // Once the trie is committed, it's not usable anymore. A new trie must // be created with new root and updated trie database for following usage func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { - // Reset accessList at the end of commit - defer func() { - t.accessList = make(map[string][]byte) - }() + defer t.tracer.reset() + // Trie is empty and can be classified into two types of situations: // - The trie was empty and no update happens // - The trie was non-empty and all nodes are dropped if t.root == nil { - return emptyRoot, nil + // Wrap tracked deletions as the return + set := NewNodeSet(t.owner) + t.tracer.markDeletions(set) + return emptyRoot, set } // Derive the hash for all dirty nodes first. We hold the assumption // in the following procedure that all nodes are hashed. @@ -568,9 +590,8 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { t.root = hashedNode return rootHash, nil } - nodes := NewNodeSet(t.owner, t.accessList) - h := newCommitter(nodes, collectLeaf) - newRoot := h.Commit(t.root) + h := newCommitter(t.owner, t.tracer, collectLeaf) + newRoot, nodes := h.Commit(t.root) t.root = newRoot return rootHash, nodes } @@ -593,5 +614,5 @@ func (t *Trie) Reset() { t.root = nil t.owner = common.Hash{} t.unhashed = 0 - t.accessList = make(map[string][]byte) + t.tracer.reset() } diff --git a/trie/trie_test.go b/trie/trie_test.go index b87b7d3cd..2fb97eebb 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -410,6 +410,7 @@ func runRandTest(rt randTest) bool { values = make(map[string]string) // tracks content of the trie origTrie = NewEmpty(triedb) ) + tr.tracer = newTracer() for i, step := range rt { // fmt.Printf("{op: %d, key: common.Hex2Bytes(\"%x\"), value: common.Hex2Bytes(\"%x\")}, // step %d\n", @@ -448,14 +449,21 @@ func runRandTest(rt randTest) bool { root, nodes := tr.Commit(true) // Validity the returned nodeset if nodes != nil { - for path := range nodes.nodes { + for path, node := range nodes.updates.nodes { blob, _, _ := origTrie.TryGetNode(hexToCompact([]byte(path))) - got := nodes.accessList[path] + got := node.prev if !bytes.Equal(blob, got) { rt[i].err = fmt.Errorf("prevalue mismatch for 0x%x, got 0x%x want 0x%x", path, got, blob) panic(rt[i].err) } } + for path, prev := range nodes.deletes { + blob, _, _ := origTrie.TryGetNode(hexToCompact([]byte(path))) + if !bytes.Equal(blob, prev) { + rt[i].err = fmt.Errorf("prevalue mismatch for 0x%x, got 0x%x want 0x%x", path, prev, blob) + return false + } + } } if nodes != nil { triedb.Update(NewWithNodeSet(nodes)) @@ -469,6 +477,7 @@ func runRandTest(rt randTest) bool { // Enable node tracing. Resolve the root node again explicitly // since it's not captured at the beginning. + tr.tracer = newTracer() tr.resolveAndTrack(root.Bytes(), nil) origTrie = tr.Copy() @@ -481,6 +490,59 @@ func runRandTest(rt randTest) bool { if tr.Hash() != checktr.Hash() { rt[i].err = fmt.Errorf("hash mismatch in opItercheckhash") } + case opNodeDiff: + var ( + inserted = tr.tracer.insertList() + deleted = tr.tracer.deleteList() + origIter = origTrie.NodeIterator(nil) + curIter = tr.NodeIterator(nil) + origSeen = make(map[string]struct{}) + curSeen = make(map[string]struct{}) + ) + for origIter.Next(true) { + if origIter.Leaf() { + continue + } + origSeen[string(origIter.Path())] = struct{}{} + } + for curIter.Next(true) { + if curIter.Leaf() { + continue + } + curSeen[string(curIter.Path())] = struct{}{} + } + var ( + insertExp = make(map[string]struct{}) + deleteExp = make(map[string]struct{}) + ) + for path := range curSeen { + _, present := origSeen[path] + if !present { + insertExp[path] = struct{}{} + } + } + for path := range origSeen { + _, present := curSeen[path] + if !present { + deleteExp[path] = struct{}{} + } + } + if len(insertExp) != len(inserted) { + rt[i].err = fmt.Errorf("insert set mismatch") + } + if len(deleteExp) != len(deleted) { + rt[i].err = fmt.Errorf("delete set mismatch") + } + for _, insert := range inserted { + if _, present := insertExp[string(insert)]; !present { + rt[i].err = fmt.Errorf("missing inserted node") + } + } + for _, del := range deleted { + if _, present := deleteExp[string(del)]; !present { + rt[i].err = fmt.Errorf("missing deleted node") + } + } } // Abort the test on error. if rt[i].err != nil { diff --git a/trie/util_test.go b/trie/util_test.go new file mode 100644 index 000000000..01a46553a --- /dev/null +++ b/trie/util_test.go @@ -0,0 +1,304 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" +) + +// Tests if the trie diffs are tracked correctly. +func TestTrieTracer(t *testing.T) { + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) + trie.tracer = newTracer() + + // Insert a batch of entries, all the nodes should be marked as inserted + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + trie.Hash() + + seen := make(map[string]struct{}) + it := trie.NodeIterator(nil) + for it.Next(true) { + if it.Leaf() { + continue + } + seen[string(it.Path())] = struct{}{} + } + inserted := trie.tracer.insertList() + if len(inserted) != len(seen) { + t.Fatalf("Unexpected inserted node tracked want %d got %d", len(seen), len(inserted)) + } + for _, k := range inserted { + _, ok := seen[string(k)] + if !ok { + t.Fatalf("Unexpected inserted node") + } + } + deleted := trie.tracer.deleteList() + if len(deleted) != 0 { + t.Fatalf("Unexpected deleted node tracked %d", len(deleted)) + } + + // Commit the changes and re-create with new root + root, nodes := trie.Commit(false) + if err := db.Update(NewWithNodeSet(nodes)); err != nil { + t.Fatal(err) + } + trie, _ = New(TrieID(root), db) + trie.tracer = newTracer() + + // Delete all the elements, check deletion set + for _, val := range vals { + trie.Delete([]byte(val.k)) + } + trie.Hash() + + inserted = trie.tracer.insertList() + if len(inserted) != 0 { + t.Fatalf("Unexpected inserted node tracked %d", len(inserted)) + } + deleted = trie.tracer.deleteList() + if len(deleted) != len(seen) { + t.Fatalf("Unexpected deleted node tracked want %d got %d", len(seen), len(deleted)) + } + for _, k := range deleted { + _, ok := seen[string(k)] + if !ok { + t.Fatalf("Unexpected inserted node") + } + } +} + +func TestTrieTracerNoop(t *testing.T) { + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) + trie.tracer = newTracer() + + // Insert a batch of entries, all the nodes should be marked as inserted + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + for _, val := range vals { + trie.Delete([]byte(val.k)) + } + if len(trie.tracer.insertList()) != 0 { + t.Fatalf("Unexpected inserted node tracked %d", len(trie.tracer.insertList())) + } + if len(trie.tracer.deleteList()) != 0 { + t.Fatalf("Unexpected deleted node tracked %d", len(trie.tracer.deleteList())) + } +} + +func TestTrieTracePrevValue(t *testing.T) { + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) + trie.tracer = newTracer() + + paths, blobs := trie.tracer.prevList() + if len(paths) != 0 || len(blobs) != 0 { + t.Fatalf("Nothing should be tracked") + } + // Insert a batch of entries, all the nodes should be marked as inserted + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + paths, blobs = trie.tracer.prevList() + if len(paths) != 0 || len(blobs) != 0 { + t.Fatalf("Nothing should be tracked") + } + + // Commit the changes and re-create with new root + root, nodes := trie.Commit(false) + if err := db.Update(NewWithNodeSet(nodes)); err != nil { + t.Fatal(err) + } + trie, _ = New(TrieID(root), db) + trie.tracer = newTracer() + trie.resolveAndTrack(root.Bytes(), nil) + + // Load all nodes in trie + for _, val := range vals { + trie.TryGet([]byte(val.k)) + } + + // Ensure all nodes are tracked by tracer with correct prev-values + iter := trie.NodeIterator(nil) + seen := make(map[string][]byte) + for iter.Next(true) { + // Embedded nodes are ignored since they are not present in + // database. + if iter.Hash() == (common.Hash{}) { + continue + } + seen[string(iter.Path())] = common.CopyBytes(iter.NodeBlob()) + } + + paths, blobs = trie.tracer.prevList() + if len(paths) != len(seen) || len(blobs) != len(seen) { + t.Fatalf("Unexpected tracked values") + } + for i, path := range paths { + blob := blobs[i] + prev, ok := seen[string(path)] + if !ok { + t.Fatalf("Missing node %v", path) + } + if !bytes.Equal(blob, prev) { + t.Fatalf("Unexpected value path: %v, want: %v, got: %v", path, prev, blob) + } + } + + // Re-open the trie and iterate the trie, ensure nothing will be tracked. + // Iterator will not link any loaded nodes to trie. + trie, _ = New(TrieID(root), db) + trie.tracer = newTracer() + + iter = trie.NodeIterator(nil) + for iter.Next(true) { + } + paths, blobs = trie.tracer.prevList() + if len(paths) != 0 || len(blobs) != 0 { + t.Fatalf("Nothing should be tracked") + } + + // Re-open the trie and generate proof for entries, ensure nothing will + // be tracked. Prover will not link any loaded nodes to trie. + trie, _ = New(TrieID(root), db) + trie.tracer = newTracer() + for _, val := range vals { + trie.Prove([]byte(val.k), 0, rawdb.NewMemoryDatabase()) + } + paths, blobs = trie.tracer.prevList() + if len(paths) != 0 || len(blobs) != 0 { + t.Fatalf("Nothing should be tracked") + } + + // Delete entries from trie, ensure all previous values are correct. + trie, _ = New(TrieID(root), db) + trie.tracer = newTracer() + trie.resolveAndTrack(root.Bytes(), nil) + + for _, val := range vals { + trie.TryDelete([]byte(val.k)) + } + paths, blobs = trie.tracer.prevList() + if len(paths) != len(seen) || len(blobs) != len(seen) { + t.Fatalf("Unexpected tracked values") + } + for i, path := range paths { + blob := blobs[i] + prev, ok := seen[string(path)] + if !ok { + t.Fatalf("Missing node %v", path) + } + if !bytes.Equal(blob, prev) { + t.Fatalf("Unexpected value path: %v, want: %v, got: %v", path, prev, blob) + } + } +} + +func TestDeleteAll(t *testing.T) { + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) + trie.tracer = newTracer() + + // Insert a batch of entries, all the nodes should be marked as inserted + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + root, set := trie.Commit(false) + if err := db.Update(NewWithNodeSet(set)); err != nil { + t.Fatal(err) + } + // Delete entries from trie, ensure all values are detected + trie, _ = New(TrieID(root), db) + trie.tracer = newTracer() + trie.resolveAndTrack(root.Bytes(), nil) + + // Iterate all existent nodes + var ( + it = trie.NodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Hash() != (common.Hash{}) { + nodes[string(it.Path())] = common.CopyBytes(it.NodeBlob()) + } + } + + // Perform deletion to purge the entire trie + for _, val := range vals { + trie.Delete([]byte(val.k)) + } + root, set = trie.Commit(false) + if root != emptyRoot { + t.Fatalf("Invalid trie root %v", root) + } + for path, blob := range set.deletes { + prev, ok := nodes[path] + if !ok { + t.Fatalf("Extra node deleted %v", []byte(path)) + } + if !bytes.Equal(prev, blob) { + t.Fatalf("Unexpected previous value %v", []byte(path)) + } + } + if len(set.deletes) != len(nodes) { + t.Fatalf("Unexpected deletion set") + } +} diff --git a/trie/utils.go b/trie/utils.go new file mode 100644 index 000000000..5dce65cd2 --- /dev/null +++ b/trie/utils.go @@ -0,0 +1,199 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +// tracer tracks the changes of trie nodes. During the trie operations, +// some nodes can be deleted from the trie, while these deleted nodes +// won't be captured by trie.Hasher or trie.Committer. Thus, these deleted +// nodes won't be removed from the disk at all. Tracer is an auxiliary tool +// used to track all insert and delete operations of trie and capture all +// deleted nodes eventually. +// +// The changed nodes can be mainly divided into two categories: the leaf +// node and intermediate node. The former is inserted/deleted by callers +// while the latter is inserted/deleted in order to follow the rule of trie. +// This tool can track all of them no matter the node is embedded in its +// parent or not, but valueNode is never tracked. +// +// Besides, it's also used for recording the original value of the nodes +// when they are resolved from the disk. The pre-value of the nodes will +// be used to construct reverse-diffs in the future. +// +// Note tracer is not thread-safe, callers should be responsible for handling +// the concurrency issues by themselves. +type tracer struct { + insert map[string]struct{} + delete map[string]struct{} + origin map[string][]byte +} + +// newTracer initializes the tracer for capturing trie changes. +func newTracer() *tracer { + return &tracer{ + insert: make(map[string]struct{}), + delete: make(map[string]struct{}), + origin: make(map[string][]byte), + } +} + +// onRead tracks the newly loaded trie node and caches the rlp-encoded blob internally. +// Don't change the value outside of function since it's not deep-copied. +func (t *tracer) onRead(path []byte, val []byte) { + // Tracer isn't used right now, remove this check later. + if t == nil { + return + } + t.origin[string(path)] = val +} + +// onInsert tracks the newly inserted trie node. If it's already in the deletion set +// (resurrected node), then just wipe it from the deletion set as the "untouched". +func (t *tracer) onInsert(path []byte) { + // Tracer isn't used right now, remove this check later. + if t == nil { + return + } + if _, present := t.delete[string(path)]; present { + delete(t.delete, string(path)) + return + } + t.insert[string(path)] = struct{}{} +} + +// onDelete tracks the newly deleted trie node. If it's already +// in the addition set, then just wipe it from the addition set +// as it's untouched. +func (t *tracer) onDelete(path []byte) { + // Tracer isn't used right now, remove this check later. + if t == nil { + return + } + if _, present := t.insert[string(path)]; present { + delete(t.insert, string(path)) + return + } + t.delete[string(path)] = struct{}{} +} + +// insertList returns the tracked inserted trie nodes in list format. +func (t *tracer) insertList() [][]byte { + // Tracer isn't used right now, remove this check later. + if t == nil { + return nil + } + var ret [][]byte + for path := range t.insert { + ret = append(ret, []byte(path)) + } + return ret +} + +// deleteList returns the tracked deleted trie nodes in list format. +func (t *tracer) deleteList() [][]byte { + // Tracer isn't used right now, remove this check later. + if t == nil { + return nil + } + var ret [][]byte + for path := range t.delete { + ret = append(ret, []byte(path)) + } + return ret +} + +// prevList returns the tracked node blobs in list format. +func (t *tracer) prevList() ([][]byte, [][]byte) { + // Tracer isn't used right now, remove this check later. + if t == nil { + return nil, nil + } + var ( + paths [][]byte + blobs [][]byte + ) + for path, blob := range t.origin { + paths = append(paths, []byte(path)) + blobs = append(blobs, blob) + } + return paths, blobs +} + +// getPrev returns the cached original value of the specified node. +func (t *tracer) getPrev(path []byte) []byte { + // Tracer isn't used right now, remove this check later. + if t == nil { + return nil + } + return t.origin[string(path)] +} + +// reset clears the content tracked by tracer. +func (t *tracer) reset() { + // Tracer isn't used right now, remove this check later. + if t == nil { + return + } + t.insert = make(map[string]struct{}) + t.delete = make(map[string]struct{}) + t.origin = make(map[string][]byte) +} + +// copy returns a deep copied tracer instance. +func (t *tracer) copy() *tracer { + // Tracer isn't used right now, remove this check later. + if t == nil { + return nil + } + var ( + insert = make(map[string]struct{}) + delete = make(map[string]struct{}) + origin = make(map[string][]byte) + ) + for key := range t.insert { + insert[key] = struct{}{} + } + for key := range t.delete { + delete[key] = struct{}{} + } + for key, val := range t.origin { + origin[key] = val + } + return &tracer{ + insert: insert, + delete: delete, + origin: origin, + } +} + +// markDeletions puts all tracked deletions into the provided nodeset. +func (t *tracer) markDeletions(set *NodeSet) { + // Tracer isn't used right now, remove this check later. + if t == nil { + return + } + for _, path := range t.deleteList() { + // There are a few possibilities for this scenario(the node is deleted + // but not present in database previously), for example the node was + // embedded in the parent and now deleted from the trie. In this case + // it's noop from database's perspective. + val := t.getPrev(path) + if len(val) == 0 { + continue + } + set.markDeleted(path, val) + } +}