From 5021d36d358ff695075b2821063d36435944bb73 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 May 2023 15:11:04 +0800 Subject: [PATCH] all: port boring changes from pbss (#27176) * all: port boring changes from pbss * core, trie: address comments from martin * trie: minor fixes * core/rawdb: update comment * core, eth, tests, trie: address comments * tests, trie: add extra check when update trie database * trie/triedb/hashdb: degrade the error to warning --- core/blockchain.go | 4 +- core/blockchain_test.go | 2 +- core/rawdb/schema.go | 48 +++- core/state/database.go | 3 +- core/state/iterator_test.go | 12 +- core/state/snapshot/generate.go | 3 +- core/state/snapshot/generate_test.go | 7 +- core/state/state_object.go | 4 +- core/state/statedb.go | 7 +- core/state/sync_test.go | 3 +- eth/protocols/snap/sync_test.go | 19 +- light/postprocess.go | 9 +- light/trie.go | 5 +- tests/fuzzers/stacktrie/trie_fuzzer.go | 4 +- tests/fuzzers/trie/trie-fuzzer.go | 16 +- trie/committer.go | 18 +- trie/database_test.go | 20 +- trie/database_wrap.go | 267 +++++++++++++++++++++++ trie/iterator_test.go | 241 ++++++++++++++------- trie/nodeset.go | 149 ------------- trie/secure_trie.go | 3 +- trie/secure_trie_test.go | 4 +- trie/sync_test.go | 289 +++++++++++++++++++------ trie/tracer.go | 4 +- trie/tracer_test.go | 25 ++- trie/trie.go | 7 +- trie/trie_reader.go | 6 +- trie/trie_test.go | 95 ++++---- trie/{ => triedb/hashdb}/database.go | 205 +++++------------- trie/trienode/node.go | 132 ++++++++++- 30 files changed, 1056 insertions(+), 555 deletions(-) create mode 100644 trie/database_wrap.go delete mode 100644 trie/nodeset.go rename trie/{ => triedb/hashdb}/database.go (81%) diff --git a/core/blockchain.go b/core/blockchain.go index de64ab7c3..659b2f02e 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -982,8 +982,8 @@ func (bc *BlockChain) Stop() { } } // Flush the collected preimages to disk - if err := bc.stateCache.TrieDB().CommitPreimages(); err != nil { - log.Error("Failed to commit trie preimages", "err", err) + if err := bc.stateCache.TrieDB().Close(); err != nil { + log.Error("Failed to close trie db", "err", err) } // Ensure all live cached entries be saved into disk, so that we can skip // cache warmup when node restarts. diff --git a/core/blockchain_test.go b/core/blockchain_test.go index 92a65ab9e..5ec685c99 100644 --- a/core/blockchain_test.go +++ b/core/blockchain_test.go @@ -1701,7 +1701,7 @@ func TestTrieForkGC(t *testing.T) { chain.stateCache.TrieDB().Dereference(blocks[len(blocks)-1-i].Root()) chain.stateCache.TrieDB().Dereference(forks[len(blocks)-1-i].Root()) } - if len(chain.stateCache.TrieDB().Nodes()) > 0 { + if nodes, _ := chain.TrieDB().Size(); nodes > 0 { t.Fatalf("stale tries still alive after garbase collection") } } diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index fd5ab1ad4..18722ed5d 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -22,6 +22,7 @@ import ( "encoding/binary" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/metrics" ) @@ -100,7 +101,7 @@ var ( CodePrefix = []byte("c") // CodePrefix + code hash -> account code skeletonHeaderPrefix = []byte("S") // skeletonHeaderPrefix + num (uint64 big endian) -> header - // Path-based trie node scheme. + // Path-based storage scheme of merkle patricia trie. trieNodeAccountPrefix = []byte("A") // trieNodeAccountPrefix + hexPath -> trie node trieNodeStoragePrefix = []byte("O") // trieNodeStoragePrefix + accountHash + hexPath -> trie node @@ -248,3 +249,48 @@ func accountTrieNodeKey(path []byte) []byte { func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte { return append(append(trieNodeStoragePrefix, accountHash.Bytes()...), path...) } + +// IsLegacyTrieNode reports whether a provided database entry is a legacy trie +// node. The characteristics of legacy trie node are: +// - the key length is 32 bytes +// - the key is the hash of val +func IsLegacyTrieNode(key []byte, val []byte) bool { + if len(key) != common.HashLength { + return false + } + return bytes.Equal(key, crypto.Keccak256(val)) +} + +// IsAccountTrieNode reports whether a provided database entry is an account +// trie node in path-based state scheme. +func IsAccountTrieNode(key []byte) (bool, []byte) { + if !bytes.HasPrefix(key, trieNodeAccountPrefix) { + return false, nil + } + // The remaining key should only consist a hex node path + // whose length is in the range 0 to 64 (64 is excluded + // since leaves are always wrapped with shortNode). + if len(key) >= len(trieNodeAccountPrefix)+common.HashLength*2 { + return false, nil + } + return true, key[len(trieNodeAccountPrefix):] +} + +// IsStorageTrieNode reports whether a provided database entry is a storage +// trie node in path-based state scheme. +func IsStorageTrieNode(key []byte) (bool, common.Hash, []byte) { + if !bytes.HasPrefix(key, trieNodeStoragePrefix) { + return false, common.Hash{}, nil + } + // The remaining key consists of 2 parts: + // - 32 bytes account hash + // - hex node path whose length is in the range 0 to 64 + if len(key) < len(trieNodeStoragePrefix)+common.HashLength { + return false, common.Hash{}, nil + } + if len(key) >= len(trieNodeStoragePrefix)+common.HashLength+common.HashLength*2 { + return false, common.Hash{}, nil + } + accountHash := common.BytesToHash(key[len(trieNodeStoragePrefix) : len(trieNodeStoragePrefix)+common.HashLength]) + return true, accountHash, key[len(trieNodeStoragePrefix)+common.HashLength:] +} diff --git a/core/state/database.go b/core/state/database.go index 82f620b46..ace462165 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) const ( @@ -109,7 +110,7 @@ type Trie interface { // The returned nodeset can be nil if the trie is clean(nothing to commit). // Once the trie is committed, it's not usable anymore. A new trie must // be created with new root and updated trie database for following usage - Commit(collectLeaf bool) (common.Hash, *trie.NodeSet) + Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) // NodeIterator returns an iterator that returns nodes of the trie. Iteration // starts at the key after the given start key. diff --git a/core/state/iterator_test.go b/core/state/iterator_test.go index ab06cb422..24b192c26 100644 --- a/core/state/iterator_test.go +++ b/core/state/iterator_test.go @@ -21,6 +21,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" ) // Tests that the node iterator indeed walks over the entire database contents. @@ -85,9 +86,18 @@ func TestNodeIteratorCoverage(t *testing.T) { // database entry belongs to a trie node or not. func isTrieNode(scheme string, key, val []byte) (bool, common.Hash) { if scheme == rawdb.HashScheme { - if len(key) == common.HashLength { + if rawdb.IsLegacyTrieNode(key, val) { return true, common.BytesToHash(key) } + } else { + ok, _ := rawdb.IsAccountTrieNode(key) + if ok { + return true, crypto.Keccak256Hash(val) + } + ok, _, _ = rawdb.IsStorageTrieNode(key) + if ok { + return true, crypto.Keccak256Hash(val) + } } return false, common.Hash{} } diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 68c2f574b..4703b45df 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -32,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -363,7 +364,7 @@ func (dl *diskLayer) generateRange(ctx *generatorContext, trieId *trie.ID, prefi } root, nodes := snapTrie.Commit(false) if nodes != nil { - tdb.Update(trie.NewWithNodeSet(nodes)) + tdb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) tdb.Commit(root, false) } resolver = func(owner common.Hash, path []byte, hash common.Hash) []byte { diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 546132e7d..265515a9c 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -144,7 +145,7 @@ type testHelper struct { diskdb ethdb.Database triedb *trie.Database accTrie *trie.StateTrie - nodes *trie.MergedNodeSet + nodes *trienode.MergedNodeSet } func newHelper() *testHelper { @@ -155,7 +156,7 @@ func newHelper() *testHelper { diskdb: diskdb, triedb: triedb, accTrie: accTrie, - nodes: trie.NewMergedNodeSet(), + nodes: trienode.NewMergedNodeSet(), } } @@ -203,7 +204,7 @@ func (t *testHelper) Commit() common.Hash { if nodes != nil { t.nodes.Merge(nodes) } - t.triedb.Update(t.nodes) + t.triedb.Update(root, types.EmptyRootHash, t.nodes) t.triedb.Commit(root, false) return root } diff --git a/core/state/state_object.go b/core/state/state_object.go index 95fe6f52f..1e28b4c12 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -28,7 +28,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" - "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) type Code []byte @@ -350,7 +350,7 @@ func (s *stateObject) updateRoot(db Database) { // commitTrie submits the storage changes into the storage trie and re-computes // the root. Besides, all trie changes will be collected in a nodeset and returned. -func (s *stateObject) commitTrie(db Database) (*trie.NodeSet, error) { +func (s *stateObject) commitTrie(db Database) (*trienode.NodeSet, error) { tr, err := s.updateTrie(db) if err != nil { return nil, err diff --git a/core/state/statedb.go b/core/state/statedb.go index a4e22baaf..9dc3b9839 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -34,6 +34,7 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) type revision struct { @@ -971,7 +972,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { accountTrieNodesDeleted int storageTrieNodesUpdated int storageTrieNodesDeleted int - nodes = trie.NewMergedNodeSet() + nodes = trienode.NewMergedNodeSet() codeWriter = s.db.DiskDB().NewBatch() ) for addr := range s.stateObjectsDirty { @@ -986,7 +987,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err != nil { return common.Hash{}, err } - // Merge the dirty nodes of storage trie into global set + // Merge the dirty nodes of storage trie into global set. if set != nil { if err := nodes.Merge(set); err != nil { return common.Hash{}, err @@ -1071,7 +1072,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { } if root != origin { start := time.Now() - if err := s.db.TrieDB().Update(nodes); err != nil { + if err := s.db.TrieDB().Update(root, origin, nodes); err != nil { return common.Hash{}, err } s.originalRoot = root diff --git a/core/state/sync_test.go b/core/state/sync_test.go index 090d55e47..87621f49c 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -602,7 +602,8 @@ func TestIncompleteStateSync(t *testing.T) { if len(nodeQueue) > 0 { results := make([]trie.NodeSyncResult, 0, len(nodeQueue)) for path, element := range nodeQueue { - data, err := srcDb.TrieDB().Node(element.hash) + owner, inner := trie.ResolvePath([]byte(element.path)) + data, err := srcDb.TrieDB().Reader(srcRoot).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x", element.hash) } diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 6a3b482d5..ee2ef9581 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -36,6 +36,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -1389,7 +1390,7 @@ func makeAccountTrieNoStorage(n int) (string, *trie.Trie, entrySlice) { // Commit the state changes into db and re-create the trie // for accessing later. root, nodes := accTrie.Commit(false) - db.Update(trie.NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) accTrie, _ = trie.New(trie.StateTrieID(root), db) return db.Scheme(), accTrie, entries @@ -1451,7 +1452,7 @@ func makeBoundaryAccountTrie(n int) (string, *trie.Trie, entrySlice) { // Commit the state changes into db and re-create the trie // for accessing later. root, nodes := accTrie.Commit(false) - db.Update(trie.NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) accTrie, _ = trie.New(trie.StateTrieID(root), db) return db.Scheme(), accTrie, entries @@ -1467,7 +1468,7 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) storageRoots = make(map[common.Hash]common.Hash) storageTries = make(map[common.Hash]*trie.Trie) storageEntries = make(map[common.Hash]entrySlice) - nodes = trie.NewMergedNodeSet() + nodes = trienode.NewMergedNodeSet() ) // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { @@ -1500,7 +1501,7 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) nodes.Merge(set) // Commit gathered dirty nodes into database - db.Update(nodes) + db.Update(root, types.EmptyRootHash, nodes) // Re-create tries with new root accTrie, _ = trie.New(trie.StateTrieID(root), db) @@ -1522,7 +1523,7 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (strin storageRoots = make(map[common.Hash]common.Hash) storageTries = make(map[common.Hash]*trie.Trie) storageEntries = make(map[common.Hash]entrySlice) - nodes = trie.NewMergedNodeSet() + nodes = trienode.NewMergedNodeSet() ) // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { @@ -1534,7 +1535,7 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (strin // Make a storage trie var ( stRoot common.Hash - stNodes *trie.NodeSet + stNodes *trienode.NodeSet stEntries entrySlice ) if boundary { @@ -1565,7 +1566,7 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (strin nodes.Merge(set) // Commit gathered dirty nodes into database - db.Update(nodes) + db.Update(root, types.EmptyRootHash, nodes) // Re-create tries with new root accTrie, err := trie.New(trie.StateTrieID(root), db) @@ -1587,7 +1588,7 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (strin // makeStorageTrieWithSeed fills a storage trie with n items, returning the // not-yet-committed trie and the sorted entries. The seeds can be used to ensure // that tries are unique. -func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Database) (common.Hash, *trie.NodeSet, entrySlice) { +func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Database) (common.Hash, *trienode.NodeSet, entrySlice) { trie, _ := trie.New(trie.StorageTrieID(common.Hash{}, owner, common.Hash{}), db) var entries entrySlice for i := uint64(1); i <= n; i++ { @@ -1610,7 +1611,7 @@ func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Databas // makeBoundaryStorageTrie constructs a storage trie. Instead of filling // storage slots normally, this function will fill a few slots which have // boundary hash. -func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (common.Hash, *trie.NodeSet, entrySlice) { +func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (common.Hash, *trienode.NodeSet, entrySlice) { var ( entries entrySlice boundaries []common.Hash diff --git a/light/postprocess.go b/light/postprocess.go index 763ba2752..38f499526 100644 --- a/light/postprocess.go +++ b/light/postprocess.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) // IndexerConfig includes a set of configs for chain indexers. @@ -134,6 +135,7 @@ type ChtIndexerBackend struct { section, sectionSize uint64 lastHash common.Hash trie *trie.Trie + originRoot common.Hash } // NewChtIndexer creates a Cht chain indexer @@ -191,6 +193,7 @@ func (c *ChtIndexerBackend) Reset(ctx context.Context, section uint64, lastSecti } } c.section = section + c.originRoot = root return err } @@ -214,7 +217,7 @@ func (c *ChtIndexerBackend) Commit() error { root, nodes := c.trie.Commit(false) // Commit trie changes into trie database in case it's not nil. if nodes != nil { - if err := c.triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + if err := c.triedb.Update(root, c.originRoot, trienode.NewWithNodeSet(nodes)); err != nil { return err } if err := c.triedb.Commit(root, false); err != nil { @@ -332,6 +335,7 @@ type BloomTrieIndexerBackend struct { size uint64 bloomTrieRatio uint64 trie *trie.Trie + originRoot common.Hash sectionHeads []common.Hash } @@ -413,6 +417,7 @@ func (b *BloomTrieIndexerBackend) Reset(ctx context.Context, section uint64, las } } b.section = section + b.originRoot = root return err } @@ -463,7 +468,7 @@ func (b *BloomTrieIndexerBackend) Commit() error { root, nodes := b.trie.Commit(false) // Commit trie changes into trie database in case it's not nil. if nodes != nil { - if err := b.triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + if err := b.triedb.Update(root, b.originRoot, trienode.NewWithNodeSet(nodes)); err != nil { return err } if err := b.triedb.Commit(root, false); err != nil { diff --git a/light/trie.go b/light/trie.go index 38dd6b5c2..6d802a5fb 100644 --- a/light/trie.go +++ b/light/trie.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -156,7 +157,7 @@ func (t *odrTrie) DeleteStorage(_ common.Address, key []byte) error { }) } -// TryDeleteAccount abstracts an account deletion from the trie. +// DeleteAccount abstracts an account deletion from the trie. func (t *odrTrie) DeleteAccount(address common.Address) error { key := crypto.Keccak256(address.Bytes()) return t.do(key, func() error { @@ -164,7 +165,7 @@ func (t *odrTrie) DeleteAccount(address common.Address) error { }) } -func (t *odrTrie) Commit(collectLeaf bool) (common.Hash, *trie.NodeSet) { +func (t *odrTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) { if t.trie == nil { return t.id.Root, nil } diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go index 809dba8ce..0099e9e16 100644 --- a/tests/fuzzers/stacktrie/trie_fuzzer.go +++ b/tests/fuzzers/stacktrie/trie_fuzzer.go @@ -27,9 +27,11 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -184,7 +186,7 @@ func (f *fuzzer) fuzz() int { // Flush trie -> database rootA, nodes := trieA.Commit(false) if nodes != nil { - dbA.Update(trie.NewWithNodeSet(nodes)) + dbA.Update(rootA, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) } // Flush memdb -> disk (sponge) dbA.Commit(rootA, false) diff --git a/tests/fuzzers/trie/trie-fuzzer.go b/tests/fuzzers/trie/trie-fuzzer.go index c0cbceff3..41baf67ec 100644 --- a/tests/fuzzers/trie/trie-fuzzer.go +++ b/tests/fuzzers/trie/trie-fuzzer.go @@ -22,7 +22,9 @@ import ( "fmt" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" ) // randTest performs random trie operations. @@ -139,11 +141,12 @@ func Fuzz(input []byte) int { } func runRandTest(rt randTest) error { - triedb := trie.NewDatabase(rawdb.NewMemoryDatabase()) - - tr := trie.NewEmpty(triedb) - values := make(map[string]string) // tracks content of the trie - + var ( + triedb = trie.NewDatabase(rawdb.NewMemoryDatabase()) + tr = trie.NewEmpty(triedb) + origin = types.EmptyRootHash + values = make(map[string]string) // tracks content of the trie + ) for i, step := range rt { switch step.op { case opUpdate: @@ -163,7 +166,7 @@ func runRandTest(rt randTest) error { case opCommit: hash, nodes := tr.Commit(false) if nodes != nil { - if err := triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + if err := triedb.Update(hash, origin, trienode.NewWithNodeSet(nodes)); err != nil { return err } } @@ -172,6 +175,7 @@ func runRandTest(rt randTest) error { return err } tr = newtr + origin = hash case opItercheckhash: checktr := trie.NewEmpty(triedb) it := trie.NewIterator(tr.NodeIterator(nil)) diff --git a/trie/committer.go b/trie/committer.go index 805d4a314..e825287fd 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -23,23 +23,17 @@ import ( "github.com/ethereum/go-ethereum/trie/trienode" ) -// leaf represents a trie leaf node -type leaf struct { - blob []byte // raw blob of leaf - parent common.Hash // the hash of parent node -} - // committer is the tool used for the trie Commit operation. The committer will // capture all dirty nodes during the commit process and keep them cached in // insertion order. type committer struct { - nodes *NodeSet + nodes *trienode.NodeSet tracer *tracer collectLeaf bool } // newCommitter creates a new committer or picks one from the pool. -func newCommitter(nodeset *NodeSet, tracer *tracer, collectLeaf bool) *committer { +func newCommitter(nodeset *trienode.NodeSet, tracer *tracer, collectLeaf bool) *committer { return &committer{ nodes: nodeset, tracer: tracer, @@ -139,7 +133,7 @@ func (c *committer) store(path []byte, n node) node { // deleted only if the node was existent in database before. prev, ok := c.tracer.accessList[string(path)] if ok { - c.nodes.addNode(path, trienode.NewWithPrev(common.Hash{}, nil, prev)) + c.nodes.AddNode(path, trienode.NewWithPrev(common.Hash{}, nil, prev)) } return n } @@ -152,7 +146,7 @@ func (c *committer) store(path []byte, n node) node { c.tracer.accessList[string(path)], ) ) - c.nodes.addNode(path, node) + c.nodes.AddNode(path, node) // Collect the corresponding leaf node if it's required. We don't check // full node since it's impossible to store value in fullNode. The key @@ -160,7 +154,7 @@ func (c *committer) store(path []byte, n node) node { if c.collectLeaf { if sn, ok := n.(*shortNode); ok { if val, ok := sn.Val.(valueNode); ok { - c.nodes.addLeaf(&leaf{blob: val, parent: nhash}) + c.nodes.AddLeaf(nhash, val) } } } @@ -172,7 +166,7 @@ type mptResolver struct{} // ForEach implements childResolver, decodes the provided node and // traverses the children inside. -func (resolver mptResolver) forEach(node []byte, onChild func(common.Hash)) { +func (resolver mptResolver) ForEach(node []byte, onChild func(common.Hash)) { forGatherChildren(mustDecodeNodeUnsafe(nil, node), onChild) } diff --git a/trie/database_test.go b/trie/database_test.go index 54d752947..cad462f73 100644 --- a/trie/database_test.go +++ b/trie/database_test.go @@ -17,17 +17,19 @@ package trie import ( - "testing" - - "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" ) -// Tests that the trie database returns a missing trie node error if attempting -// to retrieve the meta root. -func TestDatabaseMetarootFetch(t *testing.T) { - db := NewDatabase(rawdb.NewMemoryDatabase()) - if _, err := db.Node(common.Hash{}); err == nil { - t.Fatalf("metaroot retrieval succeeded") +// newTestDatabase initializes the trie database with specified scheme. +func newTestDatabase(diskdb ethdb.Database, scheme string) *Database { + db := prepare(diskdb, nil) + if scheme == rawdb.HashScheme { + db.backend = hashdb.New(diskdb, db.cleans, mptResolver{}) } + //} else { + // db.backend = snap.New(diskdb, db.cleans, nil) + //} + return db } diff --git a/trie/database_wrap.go b/trie/database_wrap.go new file mode 100644 index 000000000..b05f56847 --- /dev/null +++ b/trie/database_wrap.go @@ -0,0 +1,267 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "errors" + "runtime" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// Config defines all necessary options for database. +type Config struct { + Cache int // Memory allowance (MB) to use for caching trie nodes in memory + Journal string // Journal of clean cache to survive node restarts + Preimages bool // Flag whether the preimage of trie key is recorded +} + +// backend defines the methods needed to access/update trie nodes in different +// state scheme. +type backend interface { + // Scheme returns the identifier of used storage scheme. + Scheme() string + + // Initialized returns an indicator if the state data is already initialized + // according to the state scheme. + Initialized(genesisRoot common.Hash) bool + + // Size returns the current storage size of the memory cache in front of the + // persistent database layer. + Size() common.StorageSize + + // Update performs a state transition by committing dirty nodes contained + // in the given set in order to update state from the specified parent to + // the specified root. + Update(root common.Hash, parent common.Hash, nodes *trienode.MergedNodeSet) error + + // Commit writes all relevant trie nodes belonging to the specified state + // to disk. Report specifies whether logs will be displayed in info level. + Commit(root common.Hash, report bool) error + + // Close closes the trie database backend and releases all held resources. + Close() error +} + +// Database is the wrapper of the underlying backend which is shared by different +// types of node backend as an entrypoint. It's responsible for all interactions +// relevant with trie nodes and node preimages. +type Database struct { + config *Config // Configuration for trie database + diskdb ethdb.Database // Persistent database to store the snapshot + cleans *fastcache.Cache // Megabytes permitted using for read caches + preimages *preimageStore // The store for caching preimages + backend backend // The backend for managing trie nodes +} + +// prepare initializes the database with provided configs, but the +// database backend is still left as nil. +func prepare(diskdb ethdb.Database, config *Config) *Database { + var cleans *fastcache.Cache + if config != nil && config.Cache > 0 { + if config.Journal == "" { + cleans = fastcache.New(config.Cache * 1024 * 1024) + } else { + cleans = fastcache.LoadFromFileOrNew(config.Journal, config.Cache*1024*1024) + } + } + var preimages *preimageStore + if config != nil && config.Preimages { + preimages = newPreimageStore(diskdb) + } + return &Database{ + config: config, + diskdb: diskdb, + cleans: cleans, + preimages: preimages, + } +} + +// NewDatabase initializes the trie database with default settings, namely +// the legacy hash-based scheme is used by default. +func NewDatabase(diskdb ethdb.Database) *Database { + return NewDatabaseWithConfig(diskdb, nil) +} + +// NewDatabaseWithConfig initializes the trie database with provided configs. +// The path-based scheme is not activated yet, always initialized with legacy +// hash-based scheme by default. +func NewDatabaseWithConfig(diskdb ethdb.Database, config *Config) *Database { + db := prepare(diskdb, config) + db.backend = hashdb.New(diskdb, db.cleans, mptResolver{}) + return db +} + +// Reader returns a reader for accessing all trie nodes with provided state root. +// Nil is returned in case the state is not available. +func (db *Database) Reader(blockRoot common.Hash) Reader { + return db.backend.(*hashdb.Database).Reader(blockRoot) +} + +// Update performs a state transition by committing dirty nodes contained in the +// given set in order to update state from the specified parent to the specified +// root. The held pre-images accumulated up to this point will be flushed in case +// the size exceeds the threshold. +func (db *Database) Update(root common.Hash, parent common.Hash, nodes *trienode.MergedNodeSet) error { + if db.preimages != nil { + db.preimages.commit(false) + } + return db.backend.Update(root, parent, nodes) +} + +// Commit iterates over all the children of a particular node, writes them out +// to disk. As a side effect, all pre-images accumulated up to this point are +// also written. +func (db *Database) Commit(root common.Hash, report bool) error { + if db.preimages != nil { + db.preimages.commit(true) + } + return db.backend.Commit(root, report) +} + +// Size returns the storage size of dirty trie nodes in front of the persistent +// database and the size of cached preimages. +func (db *Database) Size() (common.StorageSize, common.StorageSize) { + var ( + storages common.StorageSize + preimages common.StorageSize + ) + storages = db.backend.Size() + if db.preimages != nil { + preimages = db.preimages.size() + } + return storages, preimages +} + +// Initialized returns an indicator if the state data is already initialized +// according to the state scheme. +func (db *Database) Initialized(genesisRoot common.Hash) bool { + return db.backend.Initialized(genesisRoot) +} + +// Scheme returns the node scheme used in the database. +func (db *Database) Scheme() string { + return db.backend.Scheme() +} + +// Close flushes the dangling preimages to disk and closes the trie database. +// It is meant to be called when closing the blockchain object, so that all +// resources held can be released correctly. +func (db *Database) Close() error { + if db.preimages != nil { + db.preimages.commit(true) + } + return db.backend.Close() +} + +// saveCache saves clean state cache to given directory path +// using specified CPU cores. +func (db *Database) saveCache(dir string, threads int) error { + if db.cleans == nil { + return nil + } + log.Info("Writing clean trie cache to disk", "path", dir, "threads", threads) + + start := time.Now() + err := db.cleans.SaveToFileConcurrent(dir, threads) + if err != nil { + log.Error("Failed to persist clean trie cache", "error", err) + return err + } + log.Info("Persisted the clean trie cache", "path", dir, "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} + +// SaveCache atomically saves fast cache data to the given dir using all +// available CPU cores. +func (db *Database) SaveCache(dir string) error { + return db.saveCache(dir, runtime.GOMAXPROCS(0)) +} + +// SaveCachePeriodically atomically saves fast cache data to the given dir with +// the specified interval. All dump operation will only use a single CPU core. +func (db *Database) SaveCachePeriodically(dir string, interval time.Duration, stopCh <-chan struct{}) { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + db.saveCache(dir, 1) + case <-stopCh: + return + } + } +} + +// Cap iteratively flushes old but still referenced trie nodes until the total +// memory usage goes below the given threshold. The held pre-images accumulated +// up to this point will be flushed in case the size exceeds the threshold. +// +// It's only supported by hash-based database and will return an error for others. +func (db *Database) Cap(limit common.StorageSize) error { + hdb, ok := db.backend.(*hashdb.Database) + if !ok { + return errors.New("not supported") + } + if db.preimages != nil { + db.preimages.commit(false) + } + return hdb.Cap(limit) +} + +// Reference adds a new reference from a parent node to a child node. This function +// is used to add reference between internal trie node and external node(e.g. storage +// trie root), all internal trie nodes are referenced together by database itself. +// +// It's only supported by hash-based database and will return an error for others. +func (db *Database) Reference(root common.Hash, parent common.Hash) error { + hdb, ok := db.backend.(*hashdb.Database) + if !ok { + return errors.New("not supported") + } + hdb.Reference(root, parent) + return nil +} + +// Dereference removes an existing reference from a root node. It's only +// supported by hash-based database and will return an error for others. +func (db *Database) Dereference(root common.Hash) error { + hdb, ok := db.backend.(*hashdb.Database) + if !ok { + return errors.New("not supported") + } + hdb.Dereference(root) + return nil +} + +// Node retrieves the rlp-encoded node blob with provided node hash. It's +// only supported by hash-based database and will return an error for others. +// Note, this function should be deprecated once ETH66 is deprecated. +func (db *Database) Node(hash common.Hash) ([]byte, error) { + hdb, ok := db.backend.(*hashdb.Database) + if !ok { + return nil, errors.New("not supported") + } + return hdb.Node(hash) +} diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 6dc38db6f..fca8c823b 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -25,9 +25,11 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/trie/trienode" ) func TestEmptyIterator(t *testing.T) { @@ -61,7 +63,7 @@ func TestIterator(t *testing.T) { trie.MustUpdate([]byte(val.k), []byte(val.v)) } root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) found := make(map[string]string) @@ -115,39 +117,61 @@ func TestIteratorLargeData(t *testing.T) { } } +type iterationElement struct { + hash common.Hash + path []byte + blob []byte +} + // Tests that the node iterator indeed walks over the entire database contents. func TestNodeIteratorCoverage(t *testing.T) { + testNodeIteratorCoverage(t, rawdb.HashScheme) + //testNodeIteratorCoverage(t, rawdb.PathScheme) +} + +func testNodeIteratorCoverage(t *testing.T, scheme string) { // Create some arbitrary test trie to iterate - db, trie, _ := makeTestTrie() + db, nodeDb, trie, _ := makeTestTrie(scheme) // Gather all the node hashes found by the iterator - hashes := make(map[common.Hash]struct{}) + var elements = make(map[common.Hash]iterationElement) for it := trie.NodeIterator(nil); it.Next(true); { if it.Hash() != (common.Hash{}) { - hashes[it.Hash()] = struct{}{} - } - } - // Cross check the hashes and the database itself - for hash := range hashes { - if _, err := db.Node(hash); err != nil { - t.Errorf("failed to retrieve reported node %x: %v", hash, err) - } - } - for hash, obj := range db.dirties { - if obj != nil && hash != (common.Hash{}) { - if _, ok := hashes[hash]; !ok { - t.Errorf("state entry not reported %x", hash) + elements[it.Hash()] = iterationElement{ + hash: it.Hash(), + path: common.CopyBytes(it.Path()), + blob: common.CopyBytes(it.NodeBlob()), } } } - it := db.diskdb.NewIterator(nil, nil) + // Cross check the hashes and the database itself + for _, element := range elements { + if blob, err := nodeDb.Reader(trie.Hash()).Node(common.Hash{}, element.path, element.hash); err != nil { + t.Errorf("failed to retrieve reported node %x: %v", element.hash, err) + } else if !bytes.Equal(blob, element.blob) { + t.Errorf("node blob is different, want %v got %v", element.blob, blob) + } + } + var ( + count int + it = db.NewIterator(nil, nil) + ) for it.Next() { - key := it.Key() - if _, ok := hashes[common.BytesToHash(key)]; !ok { - t.Errorf("state entry not reported %x", key) + res, _, _ := isTrieNode(nodeDb.Scheme(), it.Key(), it.Value()) + if !res { + continue + } + count += 1 + if elem, ok := elements[crypto.Keccak256Hash(it.Value())]; !ok { + t.Error("state entry not reported") + } else if !bytes.Equal(it.Value(), elem.blob) { + t.Errorf("node blob is different, want %v got %v", elem.blob, it.Value()) } } it.Release() + if count != len(elements) { + t.Errorf("state entry is mismatched %d %d", count, len(elements)) + } } type kvs struct{ k, v string } @@ -223,7 +247,7 @@ func TestDifferenceIterator(t *testing.T) { triea.MustUpdate([]byte(val.k), []byte(val.v)) } rootA, nodesA := triea.Commit(false) - dba.Update(NewWithNodeSet(nodesA)) + dba.Update(rootA, types.EmptyRootHash, trienode.NewWithNodeSet(nodesA)) triea, _ = New(TrieID(rootA), dba) dbb := NewDatabase(rawdb.NewMemoryDatabase()) @@ -232,7 +256,7 @@ func TestDifferenceIterator(t *testing.T) { trieb.MustUpdate([]byte(val.k), []byte(val.v)) } rootB, nodesB := trieb.Commit(false) - dbb.Update(NewWithNodeSet(nodesB)) + dbb.Update(rootB, types.EmptyRootHash, trienode.NewWithNodeSet(nodesB)) trieb, _ = New(TrieID(rootB), dbb) found := make(map[string]string) @@ -265,7 +289,7 @@ func TestUnionIterator(t *testing.T) { triea.MustUpdate([]byte(val.k), []byte(val.v)) } rootA, nodesA := triea.Commit(false) - dba.Update(NewWithNodeSet(nodesA)) + dba.Update(rootA, types.EmptyRootHash, trienode.NewWithNodeSet(nodesA)) triea, _ = New(TrieID(rootA), dba) dbb := NewDatabase(rawdb.NewMemoryDatabase()) @@ -274,7 +298,7 @@ func TestUnionIterator(t *testing.T) { trieb.MustUpdate([]byte(val.k), []byte(val.v)) } rootB, nodesB := trieb.Commit(false) - dbb.Update(NewWithNodeSet(nodesB)) + dbb.Update(rootB, types.EmptyRootHash, trienode.NewWithNodeSet(nodesB)) trieb, _ = New(TrieID(rootB), dbb) di, _ := NewUnionIterator([]NodeIterator{triea.NodeIterator(nil), trieb.NodeIterator(nil)}) @@ -320,79 +344,98 @@ func TestIteratorNoDups(t *testing.T) { } // This test checks that nodeIterator.Next can be retried after inserting missing trie nodes. -func TestIteratorContinueAfterErrorDisk(t *testing.T) { testIteratorContinueAfterError(t, false) } -func TestIteratorContinueAfterErrorMemonly(t *testing.T) { testIteratorContinueAfterError(t, true) } +func TestIteratorContinueAfterError(t *testing.T) { + testIteratorContinueAfterError(t, false, rawdb.HashScheme) + testIteratorContinueAfterError(t, true, rawdb.HashScheme) + // testIteratorContinueAfterError(t, false, rawdb.PathScheme) + // testIteratorContinueAfterError(t, true, rawdb.PathScheme) +} -func testIteratorContinueAfterError(t *testing.T, memonly bool) { +func testIteratorContinueAfterError(t *testing.T, memonly bool, scheme string) { diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) + tdb := newTestDatabase(diskdb, scheme) - tr := NewEmpty(triedb) + tr := NewEmpty(tdb) for _, val := range testdata1 { tr.MustUpdate([]byte(val.k), []byte(val.v)) } - _, nodes := tr.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) + root, nodes := tr.Commit(false) + tdb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) if !memonly { - triedb.Commit(tr.Hash(), false) + tdb.Commit(root, false) } + tr, _ = New(TrieID(root), tdb) wantNodeCount := checkIteratorNoDups(t, tr.NodeIterator(nil), nil) var ( - diskKeys [][]byte - memKeys []common.Hash + paths [][]byte + hashes []common.Hash ) if memonly { - memKeys = triedb.Nodes() + for path, n := range nodes.Nodes { + paths = append(paths, []byte(path)) + hashes = append(hashes, n.Hash) + } } else { it := diskdb.NewIterator(nil, nil) for it.Next() { - diskKeys = append(diskKeys, it.Key()) + ok, path, hash := isTrieNode(tdb.Scheme(), it.Key(), it.Value()) + if !ok { + continue + } + paths = append(paths, path) + hashes = append(hashes, hash) } it.Release() } for i := 0; i < 20; i++ { // Create trie that will load all nodes from DB. - tr, _ := New(TrieID(tr.Hash()), triedb) + tr, _ := New(TrieID(tr.Hash()), tdb) // Remove a random node from the database. It can't be the root node // because that one is already loaded. var ( - rkey common.Hash - rval []byte - robj *cachedNode + rval []byte + rpath []byte + rhash common.Hash ) for { if memonly { - rkey = memKeys[rand.Intn(len(memKeys))] + rpath = paths[rand.Intn(len(paths))] + n := nodes.Nodes[string(rpath)] + if n == nil { + continue + } + rhash = n.Hash } else { - copy(rkey[:], diskKeys[rand.Intn(len(diskKeys))]) + index := rand.Intn(len(paths)) + rpath = paths[index] + rhash = hashes[index] } - if rkey != tr.Hash() { + if rhash != tr.Hash() { break } } if memonly { - robj = triedb.dirties[rkey] - delete(triedb.dirties, rkey) + tr.reader.banned = map[string]struct{}{string(rpath): {}} } else { - rval, _ = diskdb.Get(rkey[:]) - diskdb.Delete(rkey[:]) + rval = rawdb.ReadTrieNode(diskdb, common.Hash{}, rpath, rhash, tdb.Scheme()) + rawdb.DeleteTrieNode(diskdb, common.Hash{}, rpath, rhash, tdb.Scheme()) } // Iterate until the error is hit. seen := make(map[string]bool) it := tr.NodeIterator(nil) checkIteratorNoDups(t, it, seen) missing, ok := it.Error().(*MissingNodeError) - if !ok || missing.NodeHash != rkey { + if !ok || missing.NodeHash != rhash { t.Fatal("didn't hit missing node, got", it.Error()) } // Add the node back and continue iteration. if memonly { - triedb.dirties[rkey] = robj + delete(tr.reader.banned, string(rpath)) } else { - diskdb.Put(rkey[:], rval) + rawdb.WriteTrieNode(diskdb, common.Hash{}, rpath, rhash, rval, tdb.Scheme()) } checkIteratorNoDups(t, it, seen) if it.Error() != nil { @@ -407,42 +450,48 @@ func testIteratorContinueAfterError(t *testing.T, memonly bool) { // Similar to the test above, this one checks that failure to create nodeIterator at a // certain key prefix behaves correctly when Next is called. The expectation is that Next // should retry seeking before returning true for the first time. -func TestIteratorContinueAfterSeekErrorDisk(t *testing.T) { - testIteratorContinueAfterSeekError(t, false) -} -func TestIteratorContinueAfterSeekErrorMemonly(t *testing.T) { - testIteratorContinueAfterSeekError(t, true) +func TestIteratorContinueAfterSeekError(t *testing.T) { + testIteratorContinueAfterSeekError(t, false, rawdb.HashScheme) + testIteratorContinueAfterSeekError(t, true, rawdb.HashScheme) + // testIteratorContinueAfterSeekError(t, false, rawdb.PathScheme) + // testIteratorContinueAfterSeekError(t, true, rawdb.PathScheme) } -func testIteratorContinueAfterSeekError(t *testing.T, memonly bool) { +func testIteratorContinueAfterSeekError(t *testing.T, memonly bool, scheme string) { // Commit test trie to db, then remove the node containing "bars". + var ( + barNodePath []byte + barNodeHash = common.HexToHash("05041990364eb72fcb1127652ce40d8bab765f2bfe53225b1170d276cc101c2e") + ) diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) - + triedb := newTestDatabase(diskdb, scheme) ctr := NewEmpty(triedb) for _, val := range testdata1 { ctr.MustUpdate([]byte(val.k), []byte(val.v)) } root, nodes := ctr.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) + for path, n := range nodes.Nodes { + if n.Hash == barNodeHash { + barNodePath = []byte(path) + break + } + } + triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) if !memonly { triedb.Commit(root, false) } - barNodeHash := common.HexToHash("05041990364eb72fcb1127652ce40d8bab765f2bfe53225b1170d276cc101c2e") var ( barNodeBlob []byte - barNodeObj *cachedNode ) + tr, _ := New(TrieID(root), triedb) if memonly { - barNodeObj = triedb.dirties[barNodeHash] - delete(triedb.dirties, barNodeHash) + tr.reader.banned = map[string]struct{}{string(barNodePath): {}} } else { - barNodeBlob, _ = diskdb.Get(barNodeHash[:]) - diskdb.Delete(barNodeHash[:]) + barNodeBlob = rawdb.ReadTrieNode(diskdb, common.Hash{}, barNodePath, barNodeHash, triedb.Scheme()) + rawdb.DeleteTrieNode(diskdb, common.Hash{}, barNodePath, barNodeHash, triedb.Scheme()) } // Create a new iterator that seeks to "bars". Seeking can't proceed because // the node is missing. - tr, _ := New(TrieID(root), triedb) it := tr.NodeIterator([]byte("bars")) missing, ok := it.Error().(*MissingNodeError) if !ok { @@ -452,9 +501,9 @@ func testIteratorContinueAfterSeekError(t *testing.T, memonly bool) { } // Reinsert the missing node. if memonly { - triedb.dirties[barNodeHash] = barNodeObj + delete(tr.reader.banned, string(barNodePath)) } else { - diskdb.Put(barNodeHash[:], barNodeBlob) + rawdb.WriteTrieNode(diskdb, common.Hash{}, barNodePath, barNodeHash, barNodeBlob, triedb.Scheme()) } // Check that iteration produces the right set of values. if err := checkIteratorOrder(testdata1[2:], NewIterator(it)); err != nil { @@ -475,6 +524,11 @@ func checkIteratorNoDups(t *testing.T, it NodeIterator, seen map[string]bool) in return len(seen) } +func TestIteratorNodeBlob(t *testing.T) { + testIteratorNodeBlob(t, rawdb.HashScheme) + //testIteratorNodeBlob(t, rawdb.PathScheme) +} + type loggingDb struct { getCount uint64 backend ethdb.KeyValueStore @@ -542,8 +596,8 @@ func makeLargeTestTrie() (*Database, *StateTrie, *loggingDb) { val = crypto.Keccak256(val) trie.MustUpdate(key, val) } - _, nodes := trie.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) + root, nodes := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) // Return the generated trie return triedb, trie, logDb } @@ -562,10 +616,10 @@ func TestNodeIteratorLargeTrie(t *testing.T) { } } -func TestIteratorNodeBlob(t *testing.T) { +func testIteratorNodeBlob(t *testing.T, scheme string) { var ( db = rawdb.NewMemoryDatabase() - triedb = NewDatabase(db) + triedb = newTestDatabase(db, scheme) trie = NewEmpty(triedb) ) vals := []struct{ k, v string }{ @@ -582,11 +636,12 @@ func TestIteratorNodeBlob(t *testing.T) { all[val.k] = val.v trie.MustUpdate([]byte(val.k), []byte(val.v)) } - _, nodes := trie.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) - triedb.Cap(0) + root, nodes := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + triedb.Commit(root, false) - found := make(map[common.Hash][]byte) + var found = make(map[common.Hash][]byte) + trie, _ = New(TrieID(root), triedb) it := trie.NodeIterator(nil) for it.Next(true) { if it.Hash() == (common.Hash{}) { @@ -600,9 +655,13 @@ func TestIteratorNodeBlob(t *testing.T) { var count int for dbIter.Next() { - got, present := found[common.BytesToHash(dbIter.Key())] + ok, _, _ := isTrieNode(triedb.Scheme(), dbIter.Key(), dbIter.Value()) + if !ok { + continue + } + got, present := found[crypto.Keccak256Hash(dbIter.Value())] if !present { - t.Fatalf("Miss trie node %v", dbIter.Key()) + t.Fatal("Miss trie node") } if !bytes.Equal(got, dbIter.Value()) { t.Fatalf("Unexpected trie node want %v got %v", dbIter.Value(), got) @@ -613,3 +672,29 @@ func TestIteratorNodeBlob(t *testing.T) { t.Fatal("Find extra trie node via iterator") } } + +// isTrieNode is a helper function which reports if the provided +// database entry belongs to a trie node or not. Note in tests +// only single layer trie is used, namely storage trie is not +// considered at all. +func isTrieNode(scheme string, key, val []byte) (bool, []byte, common.Hash) { + var ( + path []byte + hash common.Hash + ) + if scheme == rawdb.HashScheme { + ok := rawdb.IsLegacyTrieNode(key, val) + if !ok { + return false, nil, common.Hash{} + } + hash = common.BytesToHash(key) + } else { + ok, remain := rawdb.IsAccountTrieNode(key) + if !ok { + return false, nil, common.Hash{} + } + path = common.CopyBytes(remain) + hash = crypto.Keccak256Hash(val) + } + return true, path, hash +} diff --git a/trie/nodeset.go b/trie/nodeset.go deleted file mode 100644 index 4ebbb3329..000000000 --- a/trie/nodeset.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2022 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package trie - -import ( - "fmt" - "sort" - "strings" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/trie/trienode" -) - -// NodeSet contains all dirty nodes collected during the commit operation. -// Each node is keyed by path. It's not thread-safe to use. -type NodeSet struct { - owner common.Hash // the identifier of the trie - leaves []*leaf // the list of dirty leaves - updates int // the count of updated and inserted nodes - deletes int // the count of deleted nodes - - // The set of all dirty nodes. Dirty nodes include newly inserted nodes, - // deleted nodes and updated nodes. The original value of the newly - // inserted node must be nil, and the original value of the other two - // types must be non-nil. - nodes map[string]*trienode.WithPrev -} - -// NewNodeSet initializes an empty node set to be used for tracking dirty nodes -// from a specific account or storage trie. The owner is zero for the account -// trie and the owning account address hash for storage tries. -func NewNodeSet(owner common.Hash) *NodeSet { - return &NodeSet{ - owner: owner, - nodes: make(map[string]*trienode.WithPrev), - } -} - -// forEachWithOrder iterates the dirty nodes with the order from bottom to top, -// right to left, nodes with the longest path will be iterated first. -func (set *NodeSet) forEachWithOrder(callback func(path string, n *trienode.Node)) { - var paths sort.StringSlice - for path := range set.nodes { - paths = append(paths, path) - } - // Bottom-up, longest path first - sort.Sort(sort.Reverse(paths)) - for _, path := range paths { - callback(path, set.nodes[path].Unwrap()) - } -} - -// addNode adds the provided dirty node into set. -func (set *NodeSet) addNode(path []byte, n *trienode.WithPrev) { - if n.IsDeleted() { - set.deletes += 1 - } else { - set.updates += 1 - } - set.nodes[string(path)] = n -} - -// addLeaf adds the provided leaf node into set. -func (set *NodeSet) addLeaf(node *leaf) { - set.leaves = append(set.leaves, node) -} - -// Size returns the number of dirty nodes in set. -func (set *NodeSet) Size() (int, int) { - return set.updates, set.deletes -} - -// Hashes returns the hashes of all updated nodes. TODO(rjl493456442) how can -// we get rid of it? -func (set *NodeSet) Hashes() []common.Hash { - var ret []common.Hash - for _, node := range set.nodes { - ret = append(ret, node.Hash) - } - return ret -} - -// Summary returns a string-representation of the NodeSet. -func (set *NodeSet) Summary() string { - var out = new(strings.Builder) - fmt.Fprintf(out, "nodeset owner: %v\n", set.owner) - if set.nodes != nil { - for path, n := range set.nodes { - // Deletion - if n.IsDeleted() { - fmt.Fprintf(out, " [-]: %x prev: %x\n", path, n.Prev) - continue - } - // Insertion - if len(n.Prev) == 0 { - fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.Hash) - continue - } - // Update - fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.Hash, n.Prev) - } - } - for _, n := range set.leaves { - fmt.Fprintf(out, "[leaf]: %v\n", n) - } - return out.String() -} - -// MergedNodeSet represents a merged dirty node set for a group of tries. -type MergedNodeSet struct { - sets map[common.Hash]*NodeSet -} - -// NewMergedNodeSet initializes an empty merged set. -func NewMergedNodeSet() *MergedNodeSet { - return &MergedNodeSet{sets: make(map[common.Hash]*NodeSet)} -} - -// NewWithNodeSet constructs a merged nodeset with the provided single set. -func NewWithNodeSet(set *NodeSet) *MergedNodeSet { - merged := NewMergedNodeSet() - merged.Merge(set) - return merged -} - -// Merge merges the provided dirty nodes of a trie into the set. The assumption -// is held that no duplicated set belonging to the same trie will be merged twice. -func (set *MergedNodeSet) Merge(other *NodeSet) error { - _, present := set.sets[other.owner] - if present { - return fmt.Errorf("duplicate trie for owner %#x", other.owner) - } - set.sets[other.owner] = other - return nil -} diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 5bfd24650..b3cdeadb6 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -20,6 +20,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" ) // SecureTrie is the old name of StateTrie. @@ -212,7 +213,7 @@ func (t *StateTrie) GetKey(shaKey []byte) []byte { // All cached preimages will be also flushed if preimages recording is enabled. // Once the trie is committed, it's not usable anymore. A new trie must // be created with new root and updated trie database for following usage -func (t *StateTrie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { +func (t *StateTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) { // Write all the pre-images to the actual disk database if len(t.getSecKeyCache()) > 0 { if t.preimages != nil { diff --git a/trie/secure_trie_test.go b/trie/secure_trie_test.go index a55c10a60..2da7e6ccb 100644 --- a/trie/secure_trie_test.go +++ b/trie/secure_trie_test.go @@ -25,7 +25,9 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/trie/trienode" ) func newEmptySecure() *StateTrie { @@ -59,7 +61,7 @@ func makeTestStateTrie() (*Database, *StateTrie, map[string][]byte) { } } root, nodes := trie.Commit(false) - if err := triedb.Update(NewWithNodeSet(nodes)); err != nil { + if err := triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)); err != nil { panic(fmt.Errorf("failed to commit db %v", err)) } // Re-create the trie based on the new state diff --git a/trie/sync_test.go b/trie/sync_test.go index 70898604f..9114501ca 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -25,13 +25,16 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/trie/trienode" ) // makeTestTrie create a sample test trie to test node-wise reconstruction. -func makeTestTrie() (*Database, *StateTrie, map[string][]byte) { +func makeTestTrie(scheme string) (ethdb.Database, *Database, *StateTrie, map[string][]byte) { // Create an empty trie - triedb := NewDatabase(rawdb.NewMemoryDatabase()) + db := rawdb.NewMemoryDatabase() + triedb := newTestDatabase(db, scheme) trie, _ := NewStateTrie(TrieID(common.Hash{}), triedb) // Fill it with some arbitrary data @@ -54,23 +57,27 @@ func makeTestTrie() (*Database, *StateTrie, map[string][]byte) { } } root, nodes := trie.Commit(false) - if err := triedb.Update(NewWithNodeSet(nodes)); err != nil { + if err := triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)); err != nil { panic(fmt.Errorf("failed to commit db %v", err)) } + if err := triedb.Commit(root, false); err != nil { + panic(err) + } // Re-create the trie based on the new state trie, _ = NewStateTrie(TrieID(root), triedb) - return triedb, trie, content + return db, triedb, trie, content } // checkTrieContents cross references a reconstructed trie with an expected data // content map. -func checkTrieContents(t *testing.T, db *Database, root []byte, content map[string][]byte) { +func checkTrieContents(t *testing.T, db ethdb.Database, scheme string, root []byte, content map[string][]byte) { // Check root availability and trie contents - trie, err := NewStateTrie(TrieID(common.BytesToHash(root)), db) + ndb := newTestDatabase(db, scheme) + trie, err := NewStateTrie(TrieID(common.BytesToHash(root)), ndb) if err != nil { t.Fatalf("failed to create trie at %x: %v", root, err) } - if err := checkTrieConsistency(db, common.BytesToHash(root)); err != nil { + if err := checkTrieConsistency(db, scheme, common.BytesToHash(root)); err != nil { t.Fatalf("inconsistent trie at %x: %v", root, err) } for key, val := range content { @@ -81,9 +88,9 @@ func checkTrieContents(t *testing.T, db *Database, root []byte, content map[stri } // checkTrieConsistency checks that all nodes in a trie are indeed present. -func checkTrieConsistency(db *Database, root common.Hash) error { - // Create and iterate a trie rooted in a subnode - trie, err := NewStateTrie(TrieID(root), db) +func checkTrieConsistency(db ethdb.Database, scheme string, root common.Hash) error { + ndb := newTestDatabase(db, scheme) + trie, err := NewStateTrie(TrieID(root), ndb) if err != nil { return nil // Consider a non existent state consistent } @@ -104,11 +111,16 @@ type trieElement struct { func TestEmptySync(t *testing.T) { dbA := NewDatabase(rawdb.NewMemoryDatabase()) dbB := NewDatabase(rawdb.NewMemoryDatabase()) - emptyA, _ := New(TrieID(common.Hash{}), dbA) - emptyB, _ := New(TrieID(types.EmptyRootHash), dbB) + //dbC := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.PathScheme) + //dbD := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.PathScheme) - for i, trie := range []*Trie{emptyA, emptyB} { - sync := NewSync(trie.Hash(), memorydb.New(), nil, []*Database{dbA, dbB}[i].Scheme()) + emptyA := NewEmpty(dbA) + emptyB, _ := New(TrieID(types.EmptyRootHash), dbB) + //emptyC := NewEmpty(dbC) + //emptyD, _ := New(TrieID(types.EmptyRootHash), dbD) + + for i, trie := range []*Trie{emptyA, emptyB /*emptyC, emptyD*/} { + sync := NewSync(trie.Hash(), memorydb.New(), nil, []*Database{dbA, dbB /*dbC, dbD*/}[i].Scheme()) if paths, nodes, codes := sync.Missing(1); len(paths) != 0 || len(nodes) != 0 || len(codes) != 0 { t.Errorf("test %d: content requested for empty trie: %v, %v, %v", i, paths, nodes, codes) } @@ -117,18 +129,23 @@ func TestEmptySync(t *testing.T) { // Tests that given a root hash, a trie can sync iteratively on a single thread, // requesting retrieval tasks and returning all of them in one go. -func TestIterativeSyncIndividual(t *testing.T) { testIterativeSync(t, 1, false) } -func TestIterativeSyncBatched(t *testing.T) { testIterativeSync(t, 100, false) } -func TestIterativeSyncIndividualByPath(t *testing.T) { testIterativeSync(t, 1, true) } -func TestIterativeSyncBatchedByPath(t *testing.T) { testIterativeSync(t, 100, true) } +func TestIterativeSync(t *testing.T) { + testIterativeSync(t, 1, false, rawdb.HashScheme) + testIterativeSync(t, 100, false, rawdb.HashScheme) + testIterativeSync(t, 1, true, rawdb.HashScheme) + testIterativeSync(t, 100, true, rawdb.HashScheme) + // testIterativeSync(t, 1, false, rawdb.PathScheme) + // testIterativeSync(t, 100, false, rawdb.PathScheme) + // testIterativeSync(t, 1, true, rawdb.PathScheme) + // testIterativeSync(t, 100, true, rawdb.PathScheme) +} -func testIterativeSync(t *testing.T, count int, bypath bool) { +func testIterativeSync(t *testing.T, count int, bypath bool, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code @@ -146,7 +163,8 @@ func testIterativeSync(t *testing.T, count int, bypath bool) { results := make([]NodeSyncResult, len(elements)) if !bypath { for i, element := range elements { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for hash %x: %v", element.hash, err) } @@ -183,18 +201,22 @@ func testIterativeSync(t *testing.T, count int, bypath bool) { } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) } // Tests that the trie scheduler can correctly reconstruct the state even if only // partial results are returned, and the others sent only later. func TestIterativeDelayedSync(t *testing.T) { + testIterativeDelayedSync(t, rawdb.HashScheme) + //testIterativeDelayedSync(t, rawdb.PathScheme) +} + +func testIterativeDelayedSync(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code @@ -212,7 +234,8 @@ func TestIterativeDelayedSync(t *testing.T) { // Sync only half of the scheduled nodes results := make([]NodeSyncResult, len(elements)/2+1) for i, element := range elements[:len(results)] { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } @@ -240,22 +263,25 @@ func TestIterativeDelayedSync(t *testing.T) { } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) } // Tests that given a root hash, a trie can sync iteratively on a single thread, // requesting retrieval tasks and returning all of them in one go, however in a // random order. -func TestIterativeRandomSyncIndividual(t *testing.T) { testIterativeRandomSync(t, 1) } -func TestIterativeRandomSyncBatched(t *testing.T) { testIterativeRandomSync(t, 100) } +func TestIterativeRandomSyncIndividual(t *testing.T) { + testIterativeRandomSync(t, 1, rawdb.HashScheme) + testIterativeRandomSync(t, 100, rawdb.HashScheme) + // testIterativeRandomSync(t, 1, rawdb.PathScheme) + // testIterativeRandomSync(t, 100, rawdb.PathScheme) +} -func testIterativeRandomSync(t *testing.T, count int) { +func testIterativeRandomSync(t *testing.T, count int, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code @@ -273,7 +299,8 @@ func testIterativeRandomSync(t *testing.T, count int) { // Fetch all the queued nodes in a random order results := make([]NodeSyncResult, 0, len(queue)) for path, element := range queue { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } @@ -302,18 +329,22 @@ func testIterativeRandomSync(t *testing.T, count int) { } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) } // Tests that the trie scheduler can correctly reconstruct the state even if only // partial results are returned (Even those randomly), others sent only later. func TestIterativeRandomDelayedSync(t *testing.T) { + testIterativeRandomDelayedSync(t, rawdb.HashScheme) + // testIterativeRandomDelayedSync(t, rawdb.PathScheme) +} + +func testIterativeRandomDelayedSync(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code @@ -331,7 +362,8 @@ func TestIterativeRandomDelayedSync(t *testing.T) { // Sync only half of the scheduled nodes, even those in random order results := make([]NodeSyncResult, 0, len(queue)/2+1) for path, element := range queue { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } @@ -365,18 +397,22 @@ func TestIterativeRandomDelayedSync(t *testing.T) { } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) } // Tests that a trie sync will not request nodes multiple times, even if they // have such references. func TestDuplicateAvoidanceSync(t *testing.T) { + testDuplicateAvoidanceSync(t, rawdb.HashScheme) + // testDuplicateAvoidanceSync(t, rawdb.PathScheme) +} + +func testDuplicateAvoidanceSync(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code @@ -395,7 +431,8 @@ func TestDuplicateAvoidanceSync(t *testing.T) { for len(elements) > 0 { results := make([]NodeSyncResult, len(elements)) for i, element := range elements { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } @@ -428,27 +465,33 @@ func TestDuplicateAvoidanceSync(t *testing.T) { } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) } // Tests that at any point in time during a sync, only complete sub-tries are in // the database. -func TestIncompleteSync(t *testing.T) { +func TestIncompleteSyncHash(t *testing.T) { + testIncompleteSync(t, rawdb.HashScheme) + // testIncompleteSync(t, rawdb.PathScheme) +} + +func testIncompleteSync(t *testing.T, scheme string) { t.Parallel() + // Create a random trie to copy - srcDb, srcTrie, _ := makeTestTrie() + _, srcDb, srcTrie, _ := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code // at the testing trie. var ( - added []common.Hash - elements []trieElement - root = srcTrie.Hash() + addedKeys []string + addedHashes []common.Hash + elements []trieElement + root = srcTrie.Hash() ) paths, nodes, _ := sched.Missing(1) for i := 0; i < len(paths); i++ { @@ -462,7 +505,8 @@ func TestIncompleteSync(t *testing.T) { // Fetch a batch of trie nodes results := make([]NodeSyncResult, len(elements)) for i, element := range elements { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } @@ -483,11 +527,8 @@ func TestIncompleteSync(t *testing.T) { for _, result := range results { hash := crypto.Keccak256Hash(result.Data) if hash != root { - added = append(added, hash) - } - // Check that all known sub-tries in the synced trie are complete - if err := checkTrieConsistency(triedb, hash); err != nil { - t.Fatalf("trie inconsistent: %v", err) + addedKeys = append(addedKeys, result.Path) + addedHashes = append(addedHashes, crypto.Keccak256Hash(result.Data)) } } // Fetch the next batch to retrieve @@ -502,25 +543,31 @@ func TestIncompleteSync(t *testing.T) { } } // Sanity check that removing any node from the database is detected - for _, hash := range added { - value, _ := diskdb.Get(hash.Bytes()) - diskdb.Delete(hash.Bytes()) - if err := checkTrieConsistency(triedb, root); err == nil { - t.Fatalf("trie inconsistency not caught, missing: %x", hash) + for i, path := range addedKeys { + owner, inner := ResolvePath([]byte(path)) + nodeHash := addedHashes[i] + value := rawdb.ReadTrieNode(diskdb, owner, inner, nodeHash, scheme) + rawdb.DeleteTrieNode(diskdb, owner, inner, nodeHash, scheme) + if err := checkTrieConsistency(diskdb, srcDb.Scheme(), root); err == nil { + t.Fatalf("trie inconsistency not caught, missing: %x", path) } - diskdb.Put(hash.Bytes(), value) + rawdb.WriteTrieNode(diskdb, owner, inner, nodeHash, value, scheme) } } // Tests that trie nodes get scheduled lexicographically when having the same // depth. func TestSyncOrdering(t *testing.T) { + testSyncOrdering(t, rawdb.HashScheme) + // testSyncOrdering(t, rawdb.PathScheme) +} + +func testSyncOrdering(t *testing.T, scheme string) { // Create a random trie to copy - srcDb, srcTrie, srcData := makeTestTrie() + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) // Create a destination trie and sync with the scheduler, tracking the requests diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) sched := NewSync(srcTrie.Hash(), diskdb, nil, srcDb.Scheme()) // The code requests are ignored here since there is no code @@ -542,7 +589,8 @@ func TestSyncOrdering(t *testing.T) { for len(elements) > 0 { results := make([]NodeSyncResult, len(elements)) for i, element := range elements { - data, err := srcDb.Node(element.hash) + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(srcTrie.Hash()).Node(owner, inner, element.hash) if err != nil { t.Fatalf("failed to retrieve node data for %x: %v", element.hash, err) } @@ -571,7 +619,7 @@ func TestSyncOrdering(t *testing.T) { } } // Cross check that the two tries are in sync - checkTrieContents(t, triedb, srcTrie.Hash().Bytes(), srcData) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) // Check that the trie nodes have been requested path-ordered for i := 0; i < len(reqs)-1; i++ { @@ -585,3 +633,116 @@ func TestSyncOrdering(t *testing.T) { } } } + +func syncWith(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database) { + // Create a destination trie and sync with the scheduler + sched := NewSync(root, db, nil, srcDb.Scheme()) + + // The code requests are ignored here since there is no code + // at the testing trie. + paths, nodes, _ := sched.Missing(1) + var elements []trieElement + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } + for len(elements) > 0 { + results := make([]NodeSyncResult, len(elements)) + for i, element := range elements { + owner, inner := ResolvePath([]byte(element.path)) + data, err := srcDb.Reader(root).Node(owner, inner, element.hash) + if err != nil { + t.Fatalf("failed to retrieve node data for hash %x: %v", element.hash, err) + } + results[i] = NodeSyncResult{element.path, data} + } + for index, result := range results { + if err := sched.ProcessNode(result); err != nil { + t.Fatalf("failed to process result[%d][%v] data %v %v", index, []byte(result.Path), result.Data, err) + } + } + batch := db.NewBatch() + if err := sched.Commit(batch); err != nil { + t.Fatalf("failed to commit data: %v", err) + } + batch.Write() + + paths, nodes, _ = sched.Missing(1) + elements = elements[:0] + for i := 0; i < len(paths); i++ { + elements = append(elements, trieElement{ + path: paths[i], + hash: nodes[i], + syncPath: NewSyncPath([]byte(paths[i])), + }) + } + } +} + +// Tests that the syncing target is keeping moving which may overwrite the stale +// states synced in the last cycle. +func TestSyncMovingTarget(t *testing.T) { + testSyncMovingTarget(t, rawdb.HashScheme) + // testSyncMovingTarget(t, rawdb.PathScheme) +} + +func testSyncMovingTarget(t *testing.T, scheme string) { + // Create a random trie to copy + _, srcDb, srcTrie, srcData := makeTestTrie(scheme) + + // Create a destination trie and sync with the scheduler + diskdb := rawdb.NewMemoryDatabase() + syncWith(t, srcTrie.Hash(), diskdb, srcDb) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), srcData) + + // Push more modifications into the src trie, to see if dest trie can still + // sync with it(overwrite stale states) + var ( + preRoot = srcTrie.Hash() + diff = make(map[string][]byte) + ) + for i := byte(0); i < 10; i++ { + key, val := randBytes(32), randBytes(32) + srcTrie.MustUpdate(key, val) + diff[string(key)] = val + } + root, nodes := srcTrie.Commit(false) + if err := srcDb.Update(root, preRoot, trienode.NewWithNodeSet(nodes)); err != nil { + panic(err) + } + if err := srcDb.Commit(root, false); err != nil { + panic(err) + } + preRoot = root + srcTrie, _ = NewStateTrie(TrieID(root), srcDb) + + syncWith(t, srcTrie.Hash(), diskdb, srcDb) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), diff) + + // Revert added modifications from the src trie, to see if dest trie can still + // sync with it(overwrite reverted states) + var reverted = make(map[string][]byte) + for k := range diff { + srcTrie.MustDelete([]byte(k)) + reverted[k] = nil + } + for k := range srcData { + val := randBytes(32) + srcTrie.MustUpdate([]byte(k), val) + reverted[k] = val + } + root, nodes = srcTrie.Commit(false) + if err := srcDb.Update(root, preRoot, trienode.NewWithNodeSet(nodes)); err != nil { + panic(err) + } + if err := srcDb.Commit(root, false); err != nil { + panic(err) + } + srcTrie, _ = NewStateTrie(TrieID(root), srcDb) + + syncWith(t, srcTrie.Hash(), diskdb, srcDb) + checkTrieContents(t, diskdb, srcDb.Scheme(), srcTrie.Hash().Bytes(), reverted) +} diff --git a/trie/tracer.go b/trie/tracer.go index 04b2f12bf..2b5de8ec4 100644 --- a/trie/tracer.go +++ b/trie/tracer.go @@ -115,7 +115,7 @@ func (t *tracer) copy() *tracer { } // markDeletions puts all tracked deletions into the provided nodeset. -func (t *tracer) markDeletions(set *NodeSet) { +func (t *tracer) markDeletions(set *trienode.NodeSet) { for path := range t.deletes { // It's possible a few deleted nodes were embedded // in their parent before, the deletions can be no @@ -124,6 +124,6 @@ func (t *tracer) markDeletions(set *NodeSet) { if !ok { continue } - set.addNode([]byte(path), trienode.NewWithPrev(common.Hash{}, nil, prev)) + set.AddNode([]byte(path), trienode.NewWithPrev(common.Hash{}, nil, prev)) } } diff --git a/trie/tracer_test.go b/trie/tracer_test.go index 1b9f44108..5f61a1d6b 100644 --- a/trie/tracer_test.go +++ b/trie/tracer_test.go @@ -22,6 +22,8 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -69,7 +71,7 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) { insertSet := copySet(trie.tracer.inserts) // copy before commit deleteSet := copySet(trie.tracer.deletes) // copy before commit root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) seen := setKeys(iterNodes(db, root)) if !compareSet(insertSet, seen) { @@ -135,7 +137,7 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) { trie.MustUpdate([]byte(val.k), []byte(val.v)) } root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) if err := verifyAccessList(orig, trie, nodes); err != nil { @@ -143,13 +145,14 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) { } // Update trie + parent := root trie, _ = New(TrieID(root), db) orig = trie.Copy() for _, val := range vals { trie.MustUpdate([]byte(val.k), randBytes(32)) } root, nodes = trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) if err := verifyAccessList(orig, trie, nodes); err != nil { @@ -157,6 +160,7 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) { } // Add more new nodes + parent = root trie, _ = New(TrieID(root), db) orig = trie.Copy() var keys []string @@ -166,7 +170,7 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) { trie.MustUpdate(key, randBytes(32)) } root, nodes = trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) if err := verifyAccessList(orig, trie, nodes); err != nil { @@ -174,13 +178,14 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) { } // Partial deletions + parent = root trie, _ = New(TrieID(root), db) orig = trie.Copy() for _, key := range keys { trie.MustUpdate([]byte(key), nil) } root, nodes = trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) if err := verifyAccessList(orig, trie, nodes); err != nil { @@ -188,13 +193,14 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) { } // Delete all + parent = root trie, _ = New(TrieID(root), db) orig = trie.Copy() for _, val := range vals { trie.MustUpdate([]byte(val.k), nil) } root, nodes = trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) if err := verifyAccessList(orig, trie, nodes); err != nil { @@ -213,7 +219,7 @@ func TestAccessListLeak(t *testing.T) { trie.MustUpdate([]byte(val.k), []byte(val.v)) } root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) var cases = []struct { op func(tr *Trie) @@ -263,15 +269,16 @@ func TestTinyTree(t *testing.T) { trie.MustUpdate([]byte(val.k), randBytes(32)) } root, set := trie.Commit(false) - db.Update(NewWithNodeSet(set)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(set)) + parent := root trie, _ = New(TrieID(root), db) orig := trie.Copy() for _, val := range tiny { trie.MustUpdate([]byte(val.k), []byte(val.v)) } root, set = trie.Commit(false) - db.Update(NewWithNodeSet(set)) + db.Update(root, parent, trienode.NewWithNodeSet(set)) trie, _ = New(TrieID(root), db) if err := verifyAccessList(orig, trie, set); err != nil { diff --git a/trie/trie.go b/trie/trie.go index 14685c3df..e63b2d283 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" ) // Trie is a Merkle Patricia Trie. Use New to create a trie that sits on @@ -95,7 +96,7 @@ func New(id *ID, db NodeReader) (*Trie, error) { // NewEmpty is a shortcut to create empty tree. It's mostly used in tests. func NewEmpty(db *Database) *Trie { - tr, _ := New(TrieID(common.Hash{}), db) + tr, _ := New(TrieID(types.EmptyRootHash), db) return tr } @@ -571,10 +572,10 @@ func (t *Trie) Hash() common.Hash { // The returned nodeset can be nil if the trie is clean (nothing to commit). // Once the trie is committed, it's not usable anymore. A new trie must // be created with new root and updated trie database for following usage -func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { +func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) { defer t.tracer.reset() - nodes := NewNodeSet(t.owner) + nodes := trienode.NewNodeSet(t.owner) t.tracer.markDeletions(nodes) // Trie is empty and can be classified into two types of situations: diff --git a/trie/trie_reader.go b/trie/trie_reader.go index e3f98a806..51855320a 100644 --- a/trie/trie_reader.go +++ b/trie/trie_reader.go @@ -32,9 +32,9 @@ type Reader interface { // NodeReader wraps all the necessary functions for accessing trie node. type NodeReader interface { - // GetReader returns a reader for accessing all trie nodes with provided + // Reader returns a reader for accessing all trie nodes with provided // state root. Nil is returned in case the state is not available. - GetReader(root common.Hash) Reader + Reader(root common.Hash) Reader } // trieReader is a wrapper of the underlying node reader. It's not safe @@ -47,7 +47,7 @@ type trieReader struct { // newTrieReader initializes the trie reader with the given node reader. func newTrieReader(stateRoot, owner common.Hash, db NodeReader) (*trieReader, error) { - reader := db.GetReader(stateRoot) + reader := db.Reader(stateRoot) if reader == nil { return nil, fmt.Errorf("state not found #%x", stateRoot) } diff --git a/trie/trie_test.go b/trie/trie_test.go index a03a68283..70de46022 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" "golang.org/x/crypto/sha3" ) @@ -73,18 +74,23 @@ func TestMissingRoot(t *testing.T) { } } -func TestMissingNodeDisk(t *testing.T) { testMissingNode(t, false) } -func TestMissingNodeMemonly(t *testing.T) { testMissingNode(t, true) } +func TestMissingNode(t *testing.T) { + testMissingNode(t, false, rawdb.HashScheme) + //testMissingNode(t, false, rawdb.PathScheme) + testMissingNode(t, true, rawdb.HashScheme) + //testMissingNode(t, true, rawdb.PathScheme) +} -func testMissingNode(t *testing.T, memonly bool) { +func testMissingNode(t *testing.T, memonly bool, scheme string) { diskdb := rawdb.NewMemoryDatabase() - triedb := NewDatabase(diskdb) + triedb := newTestDatabase(diskdb, scheme) trie := NewEmpty(triedb) updateString(trie, "120000", "qwerqwerqwerqwerqwerqwerqwerqwer") updateString(trie, "123456", "asdfasdfasdfasdfasdfasdfasdfasdf") root, nodes := trie.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) + triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + if !memonly { triedb.Commit(root, false) } @@ -115,34 +121,39 @@ func testMissingNode(t *testing.T, memonly bool) { t.Errorf("Unexpected error: %v", err) } - hash := common.HexToHash("0xe1d943cc8f061a0c0b98162830b970395ac9315654824bf21b73b891365262f9") + var ( + path []byte + hash = common.HexToHash("0xe1d943cc8f061a0c0b98162830b970395ac9315654824bf21b73b891365262f9") + ) + for p, n := range nodes.Nodes { + if n.Hash == hash { + path = common.CopyBytes([]byte(p)) + break + } + } + trie, _ = New(TrieID(root), triedb) if memonly { - delete(triedb.dirties, hash) + trie.reader.banned = map[string]struct{}{string(path): {}} } else { - diskdb.Delete(hash[:]) + rawdb.DeleteTrieNode(diskdb, common.Hash{}, path, hash, scheme) } - trie, _ = New(TrieID(root), triedb) _, err = trie.Get([]byte("120000")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) } - trie, _ = New(TrieID(root), triedb) _, err = trie.Get([]byte("120099")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) } - trie, _ = New(TrieID(root), triedb) _, err = trie.Get([]byte("123456")) if err != nil { t.Errorf("Unexpected error: %v", err) } - trie, _ = New(TrieID(root), triedb) err = trie.Update([]byte("120099"), []byte("zxcv")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) } - trie, _ = New(TrieID(root), triedb) err = trie.Delete([]byte("123456")) if _, ok := err.(*MissingNodeError); !ok { t.Errorf("Wrong error: %v", err) @@ -192,7 +203,7 @@ func TestGet(t *testing.T) { return } root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) trie, _ = New(TrieID(root), db) } } @@ -249,8 +260,8 @@ func TestEmptyValues(t *testing.T) { } func TestReplication(t *testing.T) { - triedb := NewDatabase(rawdb.NewMemoryDatabase()) - trie := NewEmpty(triedb) + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -263,13 +274,13 @@ func TestReplication(t *testing.T) { for _, val := range vals { updateString(trie, val.k, val.v) } - exp, nodes := trie.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) + root, nodes := trie.Commit(false) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) // create a new trie on top of the database and check that lookups work. - trie2, err := New(TrieID(exp), triedb) + trie2, err := New(TrieID(root), db) if err != nil { - t.Fatalf("can't recreate trie at %x: %v", exp, err) + t.Fatalf("can't recreate trie at %x: %v", root, err) } for _, kv := range vals { if string(getString(trie2, kv.k)) != kv.v { @@ -277,17 +288,17 @@ func TestReplication(t *testing.T) { } } hash, nodes := trie2.Commit(false) - if hash != exp { - t.Errorf("root failure. expected %x got %x", exp, hash) + if hash != root { + t.Errorf("root failure. expected %x got %x", root, hash) } // recreate the trie after commit if nodes != nil { - triedb.Update(NewWithNodeSet(nodes)) + db.Update(hash, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) } - trie2, err = New(TrieID(hash), triedb) + trie2, err = New(TrieID(hash), db) if err != nil { - t.Fatalf("can't recreate trie at %x: %v", exp, err) + t.Fatalf("can't recreate trie at %x: %v", hash, err) } // perform some insertions on the new trie. vals2 := []struct{ k, v string }{ @@ -304,8 +315,8 @@ func TestReplication(t *testing.T) { for _, val := range vals2 { updateString(trie2, val.k, val.v) } - if hash := trie2.Hash(); hash != exp { - t.Errorf("root failure. expected %x got %x", exp, hash) + if trie2.Hash() != hash { + t.Errorf("root failure. expected %x got %x", hash, hash) } } @@ -402,12 +413,12 @@ func (randTest) Generate(r *rand.Rand, size int) reflect.Value { return reflect.ValueOf(steps) } -func verifyAccessList(old *Trie, new *Trie, set *NodeSet) error { +func verifyAccessList(old *Trie, new *Trie, set *trienode.NodeSet) error { deletes, inserts, updates := diffTries(old, new) // Check insertion set for path := range inserts { - n, ok := set.nodes[path] + n, ok := set.Nodes[path] if !ok || n.IsDeleted() { return errors.New("expect new node") } @@ -417,7 +428,7 @@ func verifyAccessList(old *Trie, new *Trie, set *NodeSet) error { } // Check deletion set for path, blob := range deletes { - n, ok := set.nodes[path] + n, ok := set.Nodes[path] if !ok || !n.IsDeleted() { return errors.New("expect deleted node") } @@ -430,7 +441,7 @@ func verifyAccessList(old *Trie, new *Trie, set *NodeSet) error { } // Check update set for path, blob := range updates { - n, ok := set.nodes[path] + n, ok := set.Nodes[path] if !ok || n.IsDeleted() { return errors.New("expect updated node") } @@ -445,8 +456,13 @@ func verifyAccessList(old *Trie, new *Trie, set *NodeSet) error { } func runRandTest(rt randTest) bool { + var scheme = rawdb.HashScheme + //if rand.Intn(2) == 0 { + // scheme = rawdb.PathScheme + //} var ( - triedb = NewDatabase(rawdb.NewMemoryDatabase()) + origin = types.EmptyRootHash + triedb = newTestDatabase(rawdb.NewMemoryDatabase(), scheme) tr = NewEmpty(triedb) values = make(map[string]string) // tracks content of the trie origTrie = NewEmpty(triedb) @@ -487,7 +503,7 @@ func runRandTest(rt randTest) bool { case opCommit: root, nodes := tr.Commit(true) if nodes != nil { - triedb.Update(NewWithNodeSet(nodes)) + triedb.Update(root, origin, trienode.NewWithNodeSet(nodes)) } newtr, err := New(TrieID(root), triedb) if err != nil { @@ -502,6 +518,7 @@ func runRandTest(rt randTest) bool { } tr = newtr origTrie = tr.Copy() + origin = root case opItercheckhash: checktr := NewEmpty(triedb) it := NewIterator(tr.NodeIterator(nil)) @@ -821,7 +838,7 @@ func TestCommitSequence(t *testing.T) { } // Flush trie -> database root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) // Flush memdb -> disk (sponge) db.Commit(root, false) if got, exp := s.sponge.Sum(nil), tc.expWriteSeqHash; !bytes.Equal(got, exp) { @@ -862,7 +879,7 @@ func TestCommitSequenceRandomBlobs(t *testing.T) { } // Flush trie -> database root, nodes := trie.Commit(false) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) // Flush memdb -> disk (sponge) db.Commit(root, false) if got, exp := s.sponge.Sum(nil), tc.expWriteSeqHash; !bytes.Equal(got, exp) { @@ -902,7 +919,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { // Flush trie -> database root, nodes := trie.Commit(false) // Flush memdb -> disk (sponge) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) db.Commit(root, false) // And flush stacktrie -> disk stRoot, err := stTrie.Commit() @@ -950,7 +967,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) { // Flush trie -> database root, nodes := trie.Commit(false) // Flush memdb -> disk (sponge) - db.Update(NewWithNodeSet(nodes)) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) db.Commit(root, false) // And flush stacktrie -> disk stRoot, err := stTrie.Commit() @@ -1121,8 +1138,8 @@ func benchmarkDerefRootFixedSize(b *testing.B, addresses [][20]byte, accounts [] trie.MustUpdate(crypto.Keccak256(addresses[i][:]), accounts[i]) } h := trie.Hash() - _, nodes := trie.Commit(false) - triedb.Update(NewWithNodeSet(nodes)) + root, nodes := trie.Commit(false) + triedb.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) b.StartTimer() triedb.Dereference(h) b.StopTimer() diff --git a/trie/database.go b/trie/triedb/hashdb/database.go similarity index 81% rename from trie/database.go rename to trie/triedb/hashdb/database.go index c105c730a..eaccfe944 100644 --- a/trie/database.go +++ b/trie/triedb/hashdb/database.go @@ -14,12 +14,11 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . -package trie +package hashdb import ( "errors" "reflect" - "runtime" "sync" "time" @@ -58,10 +57,10 @@ var ( memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) ) -// childResolver defines the required method to decode the provided +// ChildResolver defines the required method to decode the provided // trie node and iterate the children on top. -type childResolver interface { - forEach(node []byte, onChild func(common.Hash)) +type ChildResolver interface { + ForEach(node []byte, onChild func(common.Hash)) } // Database is an intermediate write layer between the trie data structures and @@ -74,7 +73,7 @@ type childResolver interface { // servers even while the trie is executing expensive garbage collection. type Database struct { diskdb ethdb.Database // Persistent storage for matured trie nodes - resolver childResolver // The handler to resolve children of nodes + resolver ChildResolver // The handler to resolve children of nodes cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes @@ -91,7 +90,6 @@ type Database struct { dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata) childrenSize common.StorageSize // Storage size of the external children tracking - preimages *preimageStore // The store for caching preimages lock sync.RWMutex } @@ -114,49 +112,20 @@ var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) // forChildren invokes the callback for all the tracked children of this node, // both the implicit ones from inside the node as well as the explicit ones // from outside the node. -func (n *cachedNode) forChildren(resolver childResolver, onChild func(hash common.Hash)) { +func (n *cachedNode) forChildren(resolver ChildResolver, onChild func(hash common.Hash)) { for child := range n.external { onChild(child) } - resolver.forEach(n.node, onChild) + resolver.ForEach(n.node, onChild) } -// Config defines all necessary options for database. -type Config struct { - Cache int // Memory allowance (MB) to use for caching trie nodes in memory - Journal string // Journal of clean cache to survive node restarts - Preimages bool // Flag whether the preimage of trie key is recorded -} - -// NewDatabase creates a new trie database to store ephemeral trie content before -// its written out to disk or garbage collected. No read cache is created, so all -// data retrievals will hit the underlying disk database. -func NewDatabase(diskdb ethdb.Database) *Database { - return NewDatabaseWithConfig(diskdb, nil) -} - -// NewDatabaseWithConfig creates a new trie database to store ephemeral trie content -// before its written out to disk or garbage collected. It also acts as a read cache -// for nodes loaded from disk. -func NewDatabaseWithConfig(diskdb ethdb.Database, config *Config) *Database { - var cleans *fastcache.Cache - if config != nil && config.Cache > 0 { - if config.Journal == "" { - cleans = fastcache.New(config.Cache * 1024 * 1024) - } else { - cleans = fastcache.LoadFromFileOrNew(config.Journal, config.Cache*1024*1024) - } - } - var preimage *preimageStore - if config != nil && config.Preimages { - preimage = newPreimageStore(diskdb) - } +// New initializes the hash-based node database. +func New(diskdb ethdb.Database, cleans *fastcache.Cache, resolver ChildResolver) *Database { return &Database{ - diskdb: diskdb, - resolver: mptResolver{}, - cleans: cleans, - dirties: make(map[common.Hash]*cachedNode), - preimages: preimage, + diskdb: diskdb, + resolver: resolver, + cleans: cleans, + dirties: make(map[common.Hash]*cachedNode), } } @@ -369,13 +338,6 @@ func (db *Database) Cap(limit common.StorageSize) error { size := db.dirtiesSize + common.StorageSize(len(db.dirties)*cachedNodeSize) size += db.childrenSize - // If the preimage cache got large enough, push to disk. If it's still small - // leave for later to deduplicate writes. - if db.preimages != nil { - if err := db.preimages.commit(false); err != nil { - return err - } - } // Keep committing nodes from the flush-list until we're below allowance oldest := db.oldest for size > limit && oldest != (common.Hash{}) { @@ -450,12 +412,6 @@ func (db *Database) Commit(node common.Hash, report bool) error { start := time.Now() batch := db.diskdb.NewBatch() - // Move all of the accumulated preimages into a write batch - if db.preimages != nil { - if err := db.preimages.commit(true); err != nil { - return err - } - } // Move the trie itself into the batch, flushing if enough data is accumulated nodes, storage := len(db.dirties), db.dirtiesSize @@ -584,9 +540,21 @@ func (c *cleaner) Delete(key []byte) error { panic("not implemented") } -// Update inserts the dirty nodes in provided nodeset into database and -// link the account trie with multiple storage tries if necessary. -func (db *Database) Update(nodes *MergedNodeSet) error { +// Initialized returns an indicator if state data is already initialized +// in hash-based scheme by checking the presence of genesis state. +func (db *Database) Initialized(genesisRoot common.Hash) bool { + return rawdb.HasLegacyTrieNode(db.diskdb, genesisRoot) +} + +// Update inserts the dirty nodes in provided nodeset into database and link the +// account trie with multiple storage tries if necessary. +func (db *Database) Update(root common.Hash, parent common.Hash, nodes *trienode.MergedNodeSet) error { + // Ensure the parent state is present and signal a warning if not. + if parent != types.EmptyRootHash { + if blob, _ := db.Node(parent); len(blob) == 0 { + log.Error("parent state is not present") + } + } db.lock.Lock() defer db.lock.Unlock() @@ -597,18 +565,18 @@ func (db *Database) Update(nodes *MergedNodeSet) error { // Note, the storage tries must be flushed before the account trie to // retain the invariant that children go into the dirty cache first. var order []common.Hash - for owner := range nodes.sets { + for owner := range nodes.Sets { if owner == (common.Hash{}) { continue } order = append(order, owner) } - if _, ok := nodes.sets[common.Hash{}]; ok { + if _, ok := nodes.Sets[common.Hash{}]; ok { order = append(order, common.Hash{}) } for _, owner := range order { - subset := nodes.sets[owner] - subset.forEachWithOrder(func(path string, n *trienode.Node) { + subset := nodes.Sets[owner] + subset.ForEachWithOrder(func(path string, n *trienode.Node) { if n.IsDeleted() { return // ignore deletion } @@ -617,14 +585,14 @@ func (db *Database) Update(nodes *MergedNodeSet) error { } // Link up the account trie and storage trie if the node points // to an account trie leaf. - if set, present := nodes.sets[common.Hash{}]; present { - for _, n := range set.leaves { + if set, present := nodes.Sets[common.Hash{}]; present { + for _, n := range set.Leaves { var account types.StateAccount - if err := rlp.DecodeBytes(n.blob, &account); err != nil { + if err := rlp.DecodeBytes(n.Blob, &account); err != nil { return err } if account.Root != types.EmptyRootHash { - db.reference(account.Root, n.parent) + db.reference(account.Root, n.Parent) } } } @@ -633,7 +601,7 @@ func (db *Database) Update(nodes *MergedNodeSet) error { // Size returns the current storage size of the memory cache in front of the // persistent database layer. -func (db *Database) Size() (common.StorageSize, common.StorageSize) { +func (db *Database) Size() common.StorageSize { db.lock.RLock() defer db.lock.RUnlock() @@ -641,89 +609,30 @@ func (db *Database) Size() (common.StorageSize, common.StorageSize) { // the total memory consumption, the maintenance metadata is also needed to be // counted. var metadataSize = common.StorageSize(len(db.dirties) * cachedNodeSize) - var preimageSize common.StorageSize - if db.preimages != nil { - preimageSize = db.preimages.size() - } - return db.dirtiesSize + db.childrenSize + metadataSize, preimageSize + return db.dirtiesSize + db.childrenSize + metadataSize } -// GetReader retrieves a node reader belonging to the given state root. -func (db *Database) GetReader(root common.Hash) Reader { - return newHashReader(db) -} - -// hashReader is reader of hashDatabase which implements the Reader interface. -type hashReader struct { - db *Database -} - -// newHashReader initializes the hash reader. -func newHashReader(db *Database) *hashReader { - return &hashReader{db: db} -} - -// Node retrieves the RLP-encoded trie node blob with the given node hash. -// No error will be returned if the node is not found. -func (reader *hashReader) Node(_ common.Hash, _ []byte, hash common.Hash) ([]byte, error) { - blob, _ := reader.db.Node(hash) - return blob, nil -} - -// saveCache saves clean state cache to given directory path -// using specified CPU cores. -func (db *Database) saveCache(dir string, threads int) error { - if db.cleans == nil { - return nil - } - log.Info("Writing clean trie cache to disk", "path", dir, "threads", threads) - - start := time.Now() - err := db.cleans.SaveToFileConcurrent(dir, threads) - if err != nil { - log.Error("Failed to persist clean trie cache", "error", err) - return err - } - log.Info("Persisted the clean trie cache", "path", dir, "elapsed", common.PrettyDuration(time.Since(start))) - return nil -} - -// SaveCache atomically saves fast cache data to the given dir using all -// available CPU cores. -func (db *Database) SaveCache(dir string) error { - return db.saveCache(dir, runtime.GOMAXPROCS(0)) -} - -// SaveCachePeriodically atomically saves fast cache data to the given dir with -// the specified interval. All dump operation will only use a single CPU core. -func (db *Database) SaveCachePeriodically(dir string, interval time.Duration, stopCh <-chan struct{}) { - ticker := time.NewTicker(interval) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - db.saveCache(dir, 1) - case <-stopCh: - return - } - } -} - -// CommitPreimages flushes the dangling preimages to disk. It is meant to be -// called when closing the blockchain object, so that preimages are persisted -// to the database. -func (db *Database) CommitPreimages() error { - db.lock.Lock() - defer db.lock.Unlock() - - if db.preimages == nil { - return nil - } - return db.preimages.commit(true) -} +// Close closes the trie database and releases all held resources. +func (db *Database) Close() error { return nil } // Scheme returns the node scheme used in the database. func (db *Database) Scheme() string { return rawdb.HashScheme } + +// Reader retrieves a node reader belonging to the given state root. +func (db *Database) Reader(root common.Hash) *reader { + return &reader{db: db} +} + +// reader is a state reader of Database which implements the Reader interface. +type reader struct { + db *Database +} + +// Node retrieves the trie node with the given node hash. +// No error will be returned if the node is not found. +func (reader *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { + blob, _ := reader.db.Node(hash) + return blob, nil +} diff --git a/trie/trienode/node.go b/trie/trienode/node.go index 5ebfea889..8152eab6c 100644 --- a/trie/trienode/node.go +++ b/trie/trienode/node.go @@ -16,7 +16,13 @@ package trienode -import "github.com/ethereum/go-ethereum/common" +import ( + "fmt" + "sort" + "strings" + + "github.com/ethereum/go-ethereum/common" +) // Node is a wrapper which contains the encoded blob of the trie node and its // unique hash identifier. It is general enough that can be used to represent @@ -65,3 +71,127 @@ func NewWithPrev(hash common.Hash, blob []byte, prev []byte) *WithPrev { Prev: prev, } } + +// leaf represents a trie leaf node +type leaf struct { + Blob []byte // raw blob of leaf + Parent common.Hash // the hash of parent node +} + +// NodeSet contains a set of nodes collected during the commit operation. +// Each node is keyed by path. It's not thread-safe to use. +type NodeSet struct { + Owner common.Hash + Leaves []*leaf + Nodes map[string]*WithPrev + updates int // the count of updated and inserted nodes + deletes int // the count of deleted nodes +} + +// NewNodeSet initializes a node set. The owner is zero for the account trie and +// the owning account address hash for storage tries. +func NewNodeSet(owner common.Hash) *NodeSet { + return &NodeSet{ + Owner: owner, + Nodes: make(map[string]*WithPrev), + } +} + +// ForEachWithOrder iterates the nodes with the order from bottom to top, +// right to left, nodes with the longest path will be iterated first. +func (set *NodeSet) ForEachWithOrder(callback func(path string, n *Node)) { + var paths sort.StringSlice + for path := range set.Nodes { + paths = append(paths, path) + } + // Bottom-up, longest path first + sort.Sort(sort.Reverse(paths)) + for _, path := range paths { + callback(path, set.Nodes[path].Unwrap()) + } +} + +// AddNode adds the provided node into set. +func (set *NodeSet) AddNode(path []byte, n *WithPrev) { + if n.IsDeleted() { + set.deletes += 1 + } else { + set.updates += 1 + } + set.Nodes[string(path)] = n +} + +// AddLeaf adds the provided leaf node into set. TODO(rjl493456442) how can +// we get rid of it? +func (set *NodeSet) AddLeaf(parent common.Hash, blob []byte) { + set.Leaves = append(set.Leaves, &leaf{Blob: blob, Parent: parent}) +} + +// Size returns the number of dirty nodes in set. +func (set *NodeSet) Size() (int, int) { + return set.updates, set.deletes +} + +// Hashes returns the hashes of all updated nodes. TODO(rjl493456442) how can +// we get rid of it? +func (set *NodeSet) Hashes() []common.Hash { + var ret []common.Hash + for _, node := range set.Nodes { + ret = append(ret, node.Hash) + } + return ret +} + +// Summary returns a string-representation of the NodeSet. +func (set *NodeSet) Summary() string { + var out = new(strings.Builder) + fmt.Fprintf(out, "nodeset owner: %v\n", set.Owner) + if set.Nodes != nil { + for path, n := range set.Nodes { + // Deletion + if n.IsDeleted() { + fmt.Fprintf(out, " [-]: %x prev: %x\n", path, n.Prev) + continue + } + // Insertion + if len(n.Prev) == 0 { + fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.Hash) + continue + } + // Update + fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.Hash, n.Prev) + } + } + for _, n := range set.Leaves { + fmt.Fprintf(out, "[leaf]: %v\n", n) + } + return out.String() +} + +// MergedNodeSet represents a merged node set for a group of tries. +type MergedNodeSet struct { + Sets map[common.Hash]*NodeSet +} + +// NewMergedNodeSet initializes an empty merged set. +func NewMergedNodeSet() *MergedNodeSet { + return &MergedNodeSet{Sets: make(map[common.Hash]*NodeSet)} +} + +// NewWithNodeSet constructs a merged nodeset with the provided single set. +func NewWithNodeSet(set *NodeSet) *MergedNodeSet { + merged := NewMergedNodeSet() + merged.Merge(set) + return merged +} + +// Merge merges the provided dirty nodes of a trie into the set. The assumption +// is held that no duplicated set belonging to the same trie will be merged twice. +func (set *MergedNodeSet) Merge(other *NodeSet) error { + _, present := set.Sets[other.Owner] + if present { + return fmt.Errorf("duplicate trie for owner %#x", other.Owner) + } + set.Sets[other.Owner] = other + return nil +}