diff --git a/trie_by_cid/trie/stacktrie.go b/trie_by_cid/trie/stacktrie.go new file mode 100644 index 0000000..f2f5355 --- /dev/null +++ b/trie_by_cid/trie/stacktrie.go @@ -0,0 +1,479 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "errors" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" +) + +var ( + stPool = sync.Pool{New: func() any { return new(stNode) }} + _ = types.TrieHasher((*StackTrie)(nil)) +) + +// StackTrieOptions contains the configured options for manipulating the stackTrie. +type StackTrieOptions struct { + Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes + Cleaner func(path []byte) // The function to clean up dangling nodes + + SkipLeftBoundary bool // Flag whether the nodes on the left boundary are skipped for committing + SkipRightBoundary bool // Flag whether the nodes on the right boundary are skipped for committing + boundaryGauge metrics.Gauge // Gauge to track how many boundary nodes are met +} + +// NewStackTrieOptions initializes an empty options for stackTrie. +func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} } + +// WithWriter configures trie node writer within the options. +func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions { + o.Writer = writer + return o +} + +// WithCleaner configures the cleaner in the option for removing dangling nodes. +func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions { + o.Cleaner = cleaner + return o +} + +// WithSkipBoundary configures whether the left and right boundary nodes are +// filtered for committing, along with a gauge metrics to track how many +// boundary nodes are met. +func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions { + o.SkipLeftBoundary = skipLeft + o.SkipRightBoundary = skipRight + o.boundaryGauge = gauge + return o +} + +// StackTrie is a trie implementation that expects keys to be inserted +// in order. Once it determines that a subtree will no longer be inserted +// into, it will hash it and free up the memory it uses. +type StackTrie struct { + options *StackTrieOptions + root *stNode + h *hasher + + first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary. + last []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary. +} + +// NewStackTrie allocates and initializes an empty trie. +func NewStackTrie(options *StackTrieOptions) *StackTrie { + if options == nil { + options = NewStackTrieOptions() + } + return &StackTrie{ + options: options, + root: stPool.Get().(*stNode), + h: newHasher(false), + } +} + +// Update inserts a (key, value) pair into the stack trie. +func (t *StackTrie) Update(key, value []byte) error { + if len(value) == 0 { + return errors.New("trying to insert empty (deletion)") + } + k := keybytesToHex(key) + k = k[:len(k)-1] // chop the termination flag + if bytes.Compare(t.last, k) >= 0 { + return errors.New("non-ascending key order") + } + // track the first and last inserted entries. + if t.first == nil { + t.first = append([]byte{}, k...) + } + if t.last == nil { + t.last = append([]byte{}, k...) // allocate key slice + } else { + t.last = append(t.last[:0], k...) // reuse key slice + } + t.insert(t.root, k, value, nil) + return nil +} + +// MustUpdate is a wrapper of Update and will omit any encountered error but +// just print out an error message. +func (t *StackTrie) MustUpdate(key, value []byte) { + if err := t.Update(key, value); err != nil { + log.Error("Unhandled trie error in StackTrie.Update", "err", err) + } +} + +// Reset resets the stack trie object to empty state. +func (t *StackTrie) Reset() { + t.options = NewStackTrieOptions() + t.root = stPool.Get().(*stNode) + t.first = nil + t.last = nil +} + +// stNode represents a node within a StackTrie +type stNode struct { + typ uint8 // node type (as in branch, ext, leaf) + key []byte // key chunk covered by this (leaf|ext) node + val []byte // value contained by this node if it's a leaf + children [16]*stNode // list of children (for branch and exts) +} + +// newLeaf constructs a leaf node with provided node key and value. The key +// will be deep-copied in the function and safe to modify afterwards, but +// value is not. +func newLeaf(key, val []byte) *stNode { + st := stPool.Get().(*stNode) + st.typ = leafNode + st.key = append(st.key, key...) + st.val = val + return st +} + +// newExt constructs an extension node with provided node key and child. The +// key will be deep-copied in the function and safe to modify afterwards. +func newExt(key []byte, child *stNode) *stNode { + st := stPool.Get().(*stNode) + st.typ = extNode + st.key = append(st.key, key...) + st.children[0] = child + return st +} + +// List all values that stNode#nodeType can hold +const ( + emptyNode = iota + branchNode + extNode + leafNode + hashedNode +) + +func (n *stNode) reset() *stNode { + n.key = n.key[:0] + n.val = nil + for i := range n.children { + n.children[i] = nil + } + n.typ = emptyNode + return n +} + +// Helper function that, given a full key, determines the index +// at which the chunk pointed by st.keyOffset is different from +// the same chunk in the full key. +func (n *stNode) getDiffIndex(key []byte) int { + for idx, nibble := range n.key { + if nibble != key[idx] { + return idx + } + } + return len(n.key) +} + +// Helper function to that inserts a (key, value) pair into +// the trie. +func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { + switch st.typ { + case branchNode: /* Branch */ + idx := int(key[0]) + + // Unresolve elder siblings + for i := idx - 1; i >= 0; i-- { + if st.children[i] != nil { + if st.children[i].typ != hashedNode { + t.hash(st.children[i], append(path, byte(i))) + } + break + } + } + + // Add new child + if st.children[idx] == nil { + st.children[idx] = newLeaf(key[1:], value) + } else { + t.insert(st.children[idx], key[1:], value, append(path, key[0])) + } + + case extNode: /* Ext */ + // Compare both key chunks and see where they differ + diffidx := st.getDiffIndex(key) + + // Check if chunks are identical. If so, recurse into + // the child node. Otherwise, the key has to be split + // into 1) an optional common prefix, 2) the fullnode + // representing the two differing path, and 3) a leaf + // for each of the differentiated subtrees. + if diffidx == len(st.key) { + // Ext key and key segment are identical, recurse into + // the child node. + t.insert(st.children[0], key[diffidx:], value, append(path, key[:diffidx]...)) + return + } + // Save the original part. Depending if the break is + // at the extension's last byte or not, create an + // intermediate extension or use the extension's child + // node directly. + var n *stNode + if diffidx < len(st.key)-1 { + // Break on the non-last byte, insert an intermediate + // extension. The path prefix of the newly-inserted + // extension should also contain the different byte. + n = newExt(st.key[diffidx+1:], st.children[0]) + t.hash(n, append(path, st.key[:diffidx+1]...)) + } else { + // Break on the last byte, no need to insert + // an extension node: reuse the current node. + // The path prefix of the original part should + // still be same. + n = st.children[0] + t.hash(n, append(path, st.key...)) + } + var p *stNode + if diffidx == 0 { + // the break is on the first byte, so + // the current node is converted into + // a branch node. + st.children[0] = nil + p = st + st.typ = branchNode + } else { + // the common prefix is at least one byte + // long, insert a new intermediate branch + // node. + st.children[0] = stPool.Get().(*stNode) + st.children[0].typ = branchNode + p = st.children[0] + } + // Create a leaf for the inserted part + o := newLeaf(key[diffidx+1:], value) + + // Insert both child leaves where they belong: + origIdx := st.key[diffidx] + newIdx := key[diffidx] + p.children[origIdx] = n + p.children[newIdx] = o + st.key = st.key[:diffidx] + + case leafNode: /* Leaf */ + // Compare both key chunks and see where they differ + diffidx := st.getDiffIndex(key) + + // Overwriting a key isn't supported, which means that + // the current leaf is expected to be split into 1) an + // optional extension for the common prefix of these 2 + // keys, 2) a fullnode selecting the path on which the + // keys differ, and 3) one leaf for the differentiated + // component of each key. + if diffidx >= len(st.key) { + panic("Trying to insert into existing key") + } + + // Check if the split occurs at the first nibble of the + // chunk. In that case, no prefix extnode is necessary. + // Otherwise, create that + var p *stNode + if diffidx == 0 { + // Convert current leaf into a branch + st.typ = branchNode + p = st + st.children[0] = nil + } else { + // Convert current node into an ext, + // and insert a child branch node. + st.typ = extNode + st.children[0] = stPool.Get().(*stNode) + st.children[0].typ = branchNode + p = st.children[0] + } + + // Create the two child leaves: one containing the original + // value and another containing the new value. The child leaf + // is hashed directly in order to free up some memory. + origIdx := st.key[diffidx] + p.children[origIdx] = newLeaf(st.key[diffidx+1:], st.val) + t.hash(p.children[origIdx], append(path, st.key[:diffidx+1]...)) + + newIdx := key[diffidx] + p.children[newIdx] = newLeaf(key[diffidx+1:], value) + + // Finally, cut off the key part that has been passed + // over to the children. + st.key = st.key[:diffidx] + st.val = nil + + case emptyNode: /* Empty */ + st.typ = leafNode + st.key = key + st.val = value + + case hashedNode: + panic("trying to insert into hash") + + default: + panic("invalid type") + } +} + +// hash converts st into a 'hashedNode', if possible. Possible outcomes: +// +// 1. The rlp-encoded value was >= 32 bytes: +// - Then the 32-byte `hash` will be accessible in `st.val`. +// - And the 'st.type' will be 'hashedNode' +// +// 2. The rlp-encoded value was < 32 bytes +// - Then the <32 byte rlp-encoded value will be accessible in 'st.val'. +// - And the 'st.type' will be 'hashedNode' AGAIN +// +// This method also sets 'st.type' to hashedNode, and clears 'st.key'. +func (t *StackTrie) hash(st *stNode, path []byte) { + var ( + blob []byte // RLP-encoded node blob + internal [][]byte // List of node paths covered by the extension node + ) + switch st.typ { + case hashedNode: + return + + case emptyNode: + st.val = types.EmptyRootHash.Bytes() + st.key = st.key[:0] + st.typ = hashedNode + return + + case branchNode: + var nodes fullNode + for i, child := range st.children { + if child == nil { + nodes.Children[i] = nilValueNode + continue + } + t.hash(child, append(path, byte(i))) + + if len(child.val) < 32 { + nodes.Children[i] = rawNode(child.val) + } else { + nodes.Children[i] = hashNode(child.val) + } + st.children[i] = nil + stPool.Put(child.reset()) // Release child back to pool. + } + nodes.encode(t.h.encbuf) + blob = t.h.encodedBytes() + + case extNode: + // recursively hash and commit child as the first step + t.hash(st.children[0], append(path, st.key...)) + + // Collect the path of internal nodes between shortNode and its **in disk** + // child. This is essential in the case of path mode scheme to avoid leaving + // danging nodes within the range of this internal path on disk, which would + // break the guarantee for state healing. + if len(st.children[0].val) >= 32 && t.options.Cleaner != nil { + for i := 1; i < len(st.key); i++ { + internal = append(internal, append(path, st.key[:i]...)) + } + } + // encode the extension node + n := shortNode{Key: hexToCompactInPlace(st.key)} + if len(st.children[0].val) < 32 { + n.Val = rawNode(st.children[0].val) + } else { + n.Val = hashNode(st.children[0].val) + } + n.encode(t.h.encbuf) + blob = t.h.encodedBytes() + + stPool.Put(st.children[0].reset()) // Release child back to pool. + st.children[0] = nil + + case leafNode: + st.key = append(st.key, byte(16)) + n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)} + + n.encode(t.h.encbuf) + blob = t.h.encodedBytes() + + default: + panic("invalid node type") + } + + st.typ = hashedNode + st.key = st.key[:0] + + // Skip committing the non-root node if the size is smaller than 32 bytes. + if len(blob) < 32 && len(path) > 0 { + st.val = common.CopyBytes(blob) + return + } + // Write the hash to the 'val'. We allocate a new val here to not mutate + // input values. + st.val = t.h.hashData(blob) + + // Short circuit if the stack trie is not configured for writing. + if t.options.Writer == nil { + return + } + // Skip committing if the node is on the left boundary and stackTrie is + // configured to filter the boundary. + if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) { + if t.options.boundaryGauge != nil { + t.options.boundaryGauge.Inc(1) + } + return + } + // Skip committing if the node is on the right boundary and stackTrie is + // configured to filter the boundary. + if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) { + if t.options.boundaryGauge != nil { + t.options.boundaryGauge.Inc(1) + } + return + } + // Clean up the internal dangling nodes covered by the extension node. + // This should be done before writing the node to adhere to the committing + // order from bottom to top. + for _, path := range internal { + t.options.Cleaner(path) + } + t.options.Writer(path, common.BytesToHash(st.val), blob) +} + +// Hash will firstly hash the entire trie if it's still not hashed and then commit +// all nodes to the associated database. Actually most of the trie nodes have been +// committed already. The main purpose here is to commit the nodes on right boundary. +// +// For stack trie, Hash and Commit are functionally identical. +func (t *StackTrie) Hash() common.Hash { + n := t.root + t.hash(n, nil) + return common.BytesToHash(n.val) +} + +// Commit will firstly hash the entire trie if it's still not hashed and then commit +// all nodes to the associated database. Actually most of the trie nodes have been +// committed already. The main purpose here is to commit the nodes on right boundary. +// +// For stack trie, Hash and Commit are functionally identical. +func (t *StackTrie) Commit() common.Hash { + return t.Hash() +} diff --git a/trie_by_cid/trie/stacktrie_fuzzer_test.go b/trie_by_cid/trie/stacktrie_fuzzer_test.go new file mode 100644 index 0000000..171dc5a --- /dev/null +++ b/trie_by_cid/trie/stacktrie_fuzzer_test.go @@ -0,0 +1,156 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "encoding/binary" + "fmt" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "golang.org/x/crypto/sha3" + "golang.org/x/exp/slices" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/trienode" +) + +func FuzzStackTrie(f *testing.F) { + f.Fuzz(func(t *testing.T, data []byte) { + fuzz(data, false) + }) +} + +func fuzz(data []byte, debugging bool) { + // This spongeDb is used to check the sequence of disk-db-writes + var ( + input = bytes.NewReader(data) + spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()} + dbA = newTestDatabase(rawdb.NewDatabase(spongeA), rawdb.HashScheme) + trieA = NewEmpty(dbA) + spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} + dbB = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme) + + options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme()) + }) + trieB = NewStackTrie(options) + vals []*kv + maxElements = 10000 + // operate on unique keys only + keys = make(map[string]struct{}) + ) + // Fill the trie with elements + for i := 0; input.Len() > 0 && i < maxElements; i++ { + k := make([]byte, 32) + input.Read(k) + var a uint16 + binary.Read(input, binary.LittleEndian, &a) + a = 1 + a%100 + v := make([]byte, a) + input.Read(v) + if input.Len() == 0 { + // If it was exhausted while reading, the value may be all zeroes, + // thus 'deletion' which is not supported on stacktrie + break + } + if _, present := keys[string(k)]; present { + // This key is a duplicate, ignore it + continue + } + keys[string(k)] = struct{}{} + vals = append(vals, &kv{k: k, v: v}) + trieA.MustUpdate(k, v) + } + if len(vals) == 0 { + return + } + // Flush trie -> database + rootA, nodes, err := trieA.Commit(false) + if err != nil { + panic(err) + } + if nodes != nil { + dbA.Update(rootA, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + } + // Flush memdb -> disk (sponge) + dbA.Commit(rootA) + + // Stacktrie requires sorted insertion + slices.SortFunc(vals, (*kv).cmp) + + for _, kv := range vals { + if debugging { + fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v) + } + trieB.MustUpdate(kv.k, kv.v) + } + rootB := trieB.Hash() + trieB.Commit() + if rootA != rootB { + panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB)) + } + sumA := spongeA.sponge.Sum(nil) + sumB := spongeB.sponge.Sum(nil) + if !bytes.Equal(sumA, sumB) { + panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB)) + } + + // Ensure all the nodes are persisted correctly + var ( + nodeset = make(map[string][]byte) // path -> blob + optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + if crypto.Keccak256Hash(blob) != hash { + panic("invalid node blob") + } + nodeset[string(path)] = common.CopyBytes(blob) + }) + trieC = NewStackTrie(optionsC) + checked int + ) + for _, kv := range vals { + trieC.MustUpdate(kv.k, kv.v) + } + rootC := trieC.Commit() + if rootA != rootC { + panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC)) + } + trieA, _ = New(TrieID(rootA), dbA) + iterA := trieA.MustNodeIterator(nil) + for iterA.Next(true) { + if iterA.Hash() == (common.Hash{}) { + if _, present := nodeset[string(iterA.Path())]; present { + panic("unexpected tiny node") + } + continue + } + nodeBlob, present := nodeset[string(iterA.Path())] + if !present { + panic("missing node") + } + if !bytes.Equal(nodeBlob, iterA.NodeBlob()) { + panic("node blob is not matched") + } + checked += 1 + } + if checked != len(nodeset) { + panic("node number is not matched") + } +} diff --git a/trie_by_cid/trie/stacktrie_test.go b/trie_by_cid/trie/stacktrie_test.go new file mode 100644 index 0000000..d6094ed --- /dev/null +++ b/trie_by_cid/trie/stacktrie_test.go @@ -0,0 +1,488 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "math/big" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/stretchr/testify/assert" + "golang.org/x/exp/slices" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/testutil" +) + +func TestStackTrieInsertAndHash(t *testing.T) { + type KeyValueHash struct { + K string // Hex string for key. + V string // Value, directly converted to bytes. + H string // Expected root hash after insert of (K, V) to an existing trie. + } + tests := [][]KeyValueHash{ + { // {0:0, 7:0, f:0} + {"00", "v_______________________0___0", "5cb26357b95bb9af08475be00243ceb68ade0b66b5cd816b0c18a18c612d2d21"}, + {"70", "v_______________________0___1", "8ff64309574f7a437a7ad1628e690eb7663cfde10676f8a904a8c8291dbc1603"}, + {"f0", "v_______________________0___2", "9e3a01bd8d43efb8e9d4b5506648150b8e3ed1caea596f84ee28e01a72635470"}, + }, + { // {1:0cc, e:{1:fc, e:fc}} + {"10cc", "v_______________________1___0", "233e9b257843f3dfdb1cce6676cdaf9e595ac96ee1b55031434d852bc7ac9185"}, + {"e1fc", "v_______________________1___1", "39c5e908ae83d0c78520c7c7bda0b3782daf594700e44546e93def8f049cca95"}, + {"eefc", "v_______________________1___2", "d789567559fd76fe5b7d9cc42f3750f942502ac1c7f2a466e2f690ec4b6c2a7c"}, + }, + { // {b:{a:ac, b:ac}, d:acc} + {"baac", "v_______________________2___0", "8be1c86ba7ec4c61e14c1a9b75055e0464c2633ae66a055a24e75450156a5d42"}, + {"bbac", "v_______________________2___1", "8495159b9895a7d88d973171d737c0aace6fe6ac02a4769fff1bc43bcccce4cc"}, + {"dacc", "v_______________________2___2", "9bcfc5b220a27328deb9dc6ee2e3d46c9ebc9c69e78acda1fa2c7040602c63ca"}, + }, + { // {0:0cccc, 2:456{0:0, 2:2} + {"00cccc", "v_______________________3___0", "e57dc2785b99ce9205080cb41b32ebea7ac3e158952b44c87d186e6d190a6530"}, + {"245600", "v_______________________3___1", "0335354adbd360a45c1871a842452287721b64b4234dfe08760b243523c998db"}, + {"245622", "v_______________________3___2", "9e6832db0dca2b5cf81c0e0727bfde6afc39d5de33e5720bccacc183c162104e"}, + }, + { // {1:4567{1:1c, 3:3c}, 3:0cccccc} + {"1456711c", "v_______________________4___0", "f2389e78d98fed99f3e63d6d1623c1d4d9e8c91cb1d585de81fbc7c0e60d3529"}, + {"1456733c", "v_______________________4___1", "101189b3fab852be97a0120c03d95eefcf984d3ed639f2328527de6def55a9c0"}, + {"30cccccc", "v_______________________4___2", "3780ce111f98d15751dfde1eb21080efc7d3914b429e5c84c64db637c55405b3"}, + }, + { // 8800{1:f, 2:e, 3:d} + {"88001f", "v_______________________5___0", "e817db50d84f341d443c6f6593cafda093fc85e773a762421d47daa6ac993bd5"}, + {"88002e", "v_______________________5___1", "d6e3e6047bdc110edd296a4d63c030aec451bee9d8075bc5a198eee8cda34f68"}, + {"88003d", "v_______________________5___2", "b6bdf8298c703342188e5f7f84921a402042d0e5fb059969dd53a6b6b1fb989e"}, + }, + { // 0{1:fc, 2:ec, 4:dc} + {"01fc", "v_______________________6___0", "693268f2ca80d32b015f61cd2c4dba5a47a6b52a14c34f8e6945fad684e7a0d5"}, + {"02ec", "v_______________________6___1", "e24ddd44469310c2b785a2044618874bf486d2f7822603a9b8dce58d6524d5de"}, + {"04dc", "v_______________________6___2", "33fc259629187bbe54b92f82f0cd8083b91a12e41a9456b84fc155321e334db7"}, + }, + { // f{0:fccc, f:ff{0:f, f:f}} + {"f0fccc", "v_______________________7___0", "b0966b5aa469a3e292bc5fcfa6c396ae7a657255eef552ea7e12f996de795b90"}, + {"ffff0f", "v_______________________7___1", "3b1ca154ec2a3d96d8d77bddef0abfe40a53a64eb03cecf78da9ec43799fa3d0"}, + {"ffffff", "v_______________________7___2", "e75463041f1be8252781be0ace579a44ea4387bf5b2739f4607af676f7719678"}, + }, + { // ff{0:f{0:f, f:f}, f:fcc} + {"ff0f0f", "v_______________________8___0", "0928af9b14718ec8262ab89df430f1e5fbf66fac0fed037aff2b6767ae8c8684"}, + {"ff0fff", "v_______________________8___1", "d870f4d3ce26b0bf86912810a1960693630c20a48ba56be0ad04bc3e9ddb01e6"}, + {"ffffcc", "v_______________________8___2", "4239f10dd9d9915ecf2e047d6a576bdc1733ed77a30830f1bf29deaf7d8e966f"}, + }, + { + {"123d", "x___________________________0", "fc453d88b6f128a77c448669710497380fa4588abbea9f78f4c20c80daa797d0"}, + {"123e", "x___________________________1", "5af48f2d8a9a015c1ff7fa8b8c7f6b676233bd320e8fb57fd7933622badd2cec"}, + {"123f", "x___________________________2", "1164d7299964e74ac40d761f9189b2a3987fae959800d0f7e29d3aaf3eae9e15"}, + }, + { + {"123d", "x___________________________0", "fc453d88b6f128a77c448669710497380fa4588abbea9f78f4c20c80daa797d0"}, + {"123e", "x___________________________1", "5af48f2d8a9a015c1ff7fa8b8c7f6b676233bd320e8fb57fd7933622badd2cec"}, + {"124a", "x___________________________2", "661a96a669869d76b7231380da0649d013301425fbea9d5c5fae6405aa31cfce"}, + }, + { + {"123d", "x___________________________0", "fc453d88b6f128a77c448669710497380fa4588abbea9f78f4c20c80daa797d0"}, + {"123e", "x___________________________1", "5af48f2d8a9a015c1ff7fa8b8c7f6b676233bd320e8fb57fd7933622badd2cec"}, + {"13aa", "x___________________________2", "6590120e1fd3ffd1a90e8de5bb10750b61079bb0776cca4414dd79a24e4d4356"}, + }, + { + {"123d", "x___________________________0", "fc453d88b6f128a77c448669710497380fa4588abbea9f78f4c20c80daa797d0"}, + {"123e", "x___________________________1", "5af48f2d8a9a015c1ff7fa8b8c7f6b676233bd320e8fb57fd7933622badd2cec"}, + {"2aaa", "x___________________________2", "f869b40e0c55eace1918332ef91563616fbf0755e2b946119679f7ef8e44b514"}, + }, + { + {"1234da", "x___________________________0", "1c4b4462e9f56a80ca0f5d77c0d632c41b0102290930343cf1791e971a045a79"}, + {"1234ea", "x___________________________1", "2f502917f3ba7d328c21c8b45ee0f160652e68450332c166d4ad02d1afe31862"}, + {"1234fa", "x___________________________2", "4f4e368ab367090d5bc3dbf25f7729f8bd60df84de309b4633a6b69ab66142c0"}, + }, + { + {"1234da", "x___________________________0", "1c4b4462e9f56a80ca0f5d77c0d632c41b0102290930343cf1791e971a045a79"}, + {"1234ea", "x___________________________1", "2f502917f3ba7d328c21c8b45ee0f160652e68450332c166d4ad02d1afe31862"}, + {"1235aa", "x___________________________2", "21840121d11a91ac8bbad9a5d06af902a5c8d56a47b85600ba813814b7bfcb9b"}, + }, + { + {"1234da", "x___________________________0", "1c4b4462e9f56a80ca0f5d77c0d632c41b0102290930343cf1791e971a045a79"}, + {"1234ea", "x___________________________1", "2f502917f3ba7d328c21c8b45ee0f160652e68450332c166d4ad02d1afe31862"}, + {"124aaa", "x___________________________2", "ea4040ddf6ae3fbd1524bdec19c0ab1581015996262006632027fa5cf21e441e"}, + }, + { + {"1234da", "x___________________________0", "1c4b4462e9f56a80ca0f5d77c0d632c41b0102290930343cf1791e971a045a79"}, + {"1234ea", "x___________________________1", "2f502917f3ba7d328c21c8b45ee0f160652e68450332c166d4ad02d1afe31862"}, + {"13aaaa", "x___________________________2", "e4beb66c67e44f2dd8ba36036e45a44ff68f8d52942472b1911a45f886a34507"}, + }, + { + {"1234da", "x___________________________0", "1c4b4462e9f56a80ca0f5d77c0d632c41b0102290930343cf1791e971a045a79"}, + {"1234ea", "x___________________________1", "2f502917f3ba7d328c21c8b45ee0f160652e68450332c166d4ad02d1afe31862"}, + {"2aaaaa", "x___________________________2", "5f5989b820ff5d76b7d49e77bb64f26602294f6c42a1a3becc669cd9e0dc8ec9"}, + }, + { + {"000000", "x___________________________0", "3b32b7af0bddc7940e7364ee18b5a59702c1825e469452c8483b9c4e0218b55a"}, + {"1234da", "x___________________________1", "3ab152a1285dca31945566f872c1cc2f17a770440eda32aeee46a5e91033dde2"}, + {"1234ea", "x___________________________2", "0cccc87f96ddef55563c1b3be3c64fff6a644333c3d9cd99852cb53b6412b9b8"}, + {"1234fa", "x___________________________3", "65bb3aafea8121111d693ffe34881c14d27b128fd113fa120961f251fe28428d"}, + }, + { + {"000000", "x___________________________0", "3b32b7af0bddc7940e7364ee18b5a59702c1825e469452c8483b9c4e0218b55a"}, + {"1234da", "x___________________________1", "3ab152a1285dca31945566f872c1cc2f17a770440eda32aeee46a5e91033dde2"}, + {"1234ea", "x___________________________2", "0cccc87f96ddef55563c1b3be3c64fff6a644333c3d9cd99852cb53b6412b9b8"}, + {"1235aa", "x___________________________3", "f670e4d2547c533c5f21e0045442e2ecb733f347ad6d29ef36e0f5ba31bb11a8"}, + }, + { + {"000000", "x___________________________0", "3b32b7af0bddc7940e7364ee18b5a59702c1825e469452c8483b9c4e0218b55a"}, + {"1234da", "x___________________________1", "3ab152a1285dca31945566f872c1cc2f17a770440eda32aeee46a5e91033dde2"}, + {"1234ea", "x___________________________2", "0cccc87f96ddef55563c1b3be3c64fff6a644333c3d9cd99852cb53b6412b9b8"}, + {"124aaa", "x___________________________3", "c17464123050a9a6f29b5574bb2f92f6d305c1794976b475b7fb0316b6335598"}, + }, + { + {"000000", "x___________________________0", "3b32b7af0bddc7940e7364ee18b5a59702c1825e469452c8483b9c4e0218b55a"}, + {"1234da", "x___________________________1", "3ab152a1285dca31945566f872c1cc2f17a770440eda32aeee46a5e91033dde2"}, + {"1234ea", "x___________________________2", "0cccc87f96ddef55563c1b3be3c64fff6a644333c3d9cd99852cb53b6412b9b8"}, + {"13aaaa", "x___________________________3", "aa8301be8cb52ea5cd249f5feb79fb4315ee8de2140c604033f4b3fff78f0105"}, + }, + { + {"0000", "x___________________________0", "cb8c09ad07ae882136f602b3f21f8733a9f5a78f1d2525a8d24d1c13258000b2"}, + {"123d", "x___________________________1", "8f09663deb02f08958136410dc48565e077f76bb6c9d8c84d35fc8913a657d31"}, + {"123e", "x___________________________2", "0d230561e398c579e09a9f7b69ceaf7d3970f5a436fdb28b68b7a37c5bdd6b80"}, + {"123f", "x___________________________3", "80f7bad1893ca57e3443bb3305a517723a74d3ba831bcaca22a170645eb7aafb"}, + }, + { + {"0000", "x___________________________0", "cb8c09ad07ae882136f602b3f21f8733a9f5a78f1d2525a8d24d1c13258000b2"}, + {"123d", "x___________________________1", "8f09663deb02f08958136410dc48565e077f76bb6c9d8c84d35fc8913a657d31"}, + {"123e", "x___________________________2", "0d230561e398c579e09a9f7b69ceaf7d3970f5a436fdb28b68b7a37c5bdd6b80"}, + {"124a", "x___________________________3", "383bc1bb4f019e6bc4da3751509ea709b58dd1ac46081670834bae072f3e9557"}, + }, + { + {"0000", "x___________________________0", "cb8c09ad07ae882136f602b3f21f8733a9f5a78f1d2525a8d24d1c13258000b2"}, + {"123d", "x___________________________1", "8f09663deb02f08958136410dc48565e077f76bb6c9d8c84d35fc8913a657d31"}, + {"123e", "x___________________________2", "0d230561e398c579e09a9f7b69ceaf7d3970f5a436fdb28b68b7a37c5bdd6b80"}, + {"13aa", "x___________________________3", "ff0dc70ce2e5db90ee42a4c2ad12139596b890e90eb4e16526ab38fa465b35cf"}, + }, + { // branch node with short values + {"01", "a", "b48605025f5f4b129d40a420e721aa7d504487f015fce85b96e52126365ef7dc"}, + {"80", "b", "2dc6b680daf74db067cb7aeaad73265ded93d96fce190fcbf64f498d475672ab"}, + {"ee", "c", "017dc705a54ac5328dd263fa1bae68d655310fb3e3f7b7bc57e9a43ddf99c4bf"}, + {"ff", "d", "bd5a3584d271d459bd4eb95247b2fc88656b3671b60c1125ffe7bc0b689470d0"}, + }, + { // ext node with short branch node, then becoming long + {"a0", "a", "a83e028cb1e4365935661a9fd36a5c65c30b9ab416eaa877424146ca2a69d088"}, + {"a1", "b", "f586a4639b07b01798ca65e05c253b75d51135ebfbf6f8d6e87c0435089e65f0"}, + {"a2", "c", "63e297c295c008e09a8d531e18d57f270b6bc403e23179b915429db948cd62e3"}, + {"a3", "d", "94a7b721535578e9381f1f4e4b6ec29f8bdc5f0458a30320684c562f5d47b4b5"}, + {"a4", "e", "4b7e66d1c81965cdbe8fab8295ef56bc57fefdc5733d4782d2f8baf630f083c6"}, + {"a5", "f", "2997e7b502198ce1783b5277faacf52b25844fb55a99b63e88bdbbafac573106"}, + {"a6", "g", "bee629dd27a40772b2e1a67ec6db270d26acdf8d3b674dfae27866ad6ae1f48b"}, + }, + { // branch node with short values, then long ones + {"a001", "v1", "b9cc982d995392b51e6787f1915f0b88efd4ad8b30f138da0a3e2242f2323e35"}, + {"b002", "v2", "a7b474bc77ef5097096fa0ee6298fdae8928c0bc3724e7311cd0fa9ed1942fc7"}, + {"c003", "v___________________________3", "dceb5bb7c92b0e348df988a8d9fc36b101397e38ebd405df55ba6ee5f14a264a"}, + {"d004", "v___________________________4", "36e60ecb86b9626165e1c6543c42ecbe4d83bca58e8e1124746961511fce362a"}, + }, + { // ext node to branch node with short values, then long ones + {"8002", "v1", "3258fcb3e9e7d7234ecd3b8d4743999e4ab3a21592565e0a5ca64c141e8620d9"}, + {"8004", "v2", "b6cb95b7024a83c17624a3c9bed09b4b5e8ed426f49f54b8ad13c39028b1e75a"}, + {"8008", "v___________________________3", "c769d82963abe6f0900bf69754738eeb2f84559777cfa87a44f54e1aab417871"}, + {"800d", "v___________________________4", "1cad1fdaab1a6fa95d7b780fd680030e423eb76669971368ba04797a8d9cdfc9"}, + }, + { // ext node with a child of size 31 (Y) and branch node with a child of size 31 (X) + {"000001", "ZZZZZZZZZ", "cef154b87c03c563408520ff9b26923c360cbc3ddb590c079bedeeb25a8c9c77"}, + {"000002", "Y", "2130735e600f612f6e657a32bd7be64ddcaec6512c5694844b19de713922895d"}, + {"000003", "XXXXXXXXXXXXXXXXXXXXXXXXXXXX", "962c0fffdeef7612a4f7bff1950d67e3e81c878e48b9ae45b3b374253b050bd8"}, + }, + } + for i, test := range tests { + // The StackTrie does not allow Insert(), Hash(), Insert(), ... + // so we will create new trie for every sequence length of inserts. + for l := 1; l <= len(test); l++ { + st := NewStackTrie(nil) + for j := 0; j < l; j++ { + kv := &test[j] + if err := st.Update(common.FromHex(kv.K), []byte(kv.V)); err != nil { + t.Fatal(err) + } + } + expected := common.HexToHash(test[l-1].H) + if h := st.Hash(); h != expected { + t.Errorf("%d(%d): root hash mismatch: %x, expected %x", i, l, h, expected) + } + } + } +} + +func TestSizeBug(t *testing.T) { + st := NewStackTrie(nil) + nt := NewEmpty(newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)) + + leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") + value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") + + nt.Update(leaf, value) + st.Update(leaf, value) + + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} + +func TestEmptyBug(t *testing.T) { + st := NewStackTrie(nil) + nt := NewEmpty(newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)) + + //leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") + //value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") + kvs := []struct { + K string + V string + }{ + {K: "405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ace", V: "9496f4ec2bf9dab484cac6be589e8417d84781be08"}, + {K: "40edb63a35fcf86c08022722aa3287cdd36440d671b4918131b2514795fefa9c", V: "01"}, + {K: "b10e2d527612073b26eecdfd717e6a320cf44b4afac2b0732d9fcbe2b7fa0cf6", V: "947a30f7736e48d6599356464ba4c150d8da0302ff"}, + {K: "c2575a0e9e593c00f959f8c92f12db2869c3395a3b0502d05e2516446f71f85b", V: "02"}, + } + + for _, kv := range kvs { + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + } + + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} + +func TestValLength56(t *testing.T) { + st := NewStackTrie(nil) + nt := NewEmpty(newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)) + + //leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") + //value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") + kvs := []struct { + K string + V string + }{ + {K: "405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3bb5ace", V: "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"}, + } + + for _, kv := range kvs { + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + } + + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} + +// TestUpdateSmallNodes tests a case where the leaves are small (both key and value), +// which causes a lot of node-within-node. This case was found via fuzzing. +func TestUpdateSmallNodes(t *testing.T) { + st := NewStackTrie(nil) + nt := NewEmpty(newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)) + kvs := []struct { + K string + V string + }{ + {"63303030", "3041"}, // stacktrie.Update + {"65", "3000"}, // stacktrie.Update + } + for _, kv := range kvs { + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + } + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} + +// TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different +// sizes are used, and the second one has the same prefix as the first, then the +// stacktrie fails, since it's unable to 'expand' on an already added leaf. +// For all practical purposes, this is fine, since keys are fixed-size length +// in account and storage tries. +// +// The test is marked as 'skipped', and exists just to have the behaviour documented. +// This case was found via fuzzing. +func TestUpdateVariableKeys(t *testing.T) { + t.SkipNow() + st := NewStackTrie(nil) + nt := NewEmpty(newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)) + kvs := []struct { + K string + V string + }{ + {"0x33303534636532393561313031676174", "303030"}, + {"0x3330353463653239356131303167617430", "313131"}, + } + for _, kv := range kvs { + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + } + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} + +// TestStacktrieNotModifyValues checks that inserting blobs of data into the +// stacktrie does not mutate the blobs +func TestStacktrieNotModifyValues(t *testing.T) { + st := NewStackTrie(nil) + { // Test a very small trie + // Give it the value as a slice with large backing alloc, + // so if the stacktrie tries to append, it won't have to realloc + value := make([]byte, 1, 100) + value[0] = 0x2 + want := common.CopyBytes(value) + st.Update([]byte{0x01}, value) + st.Hash() + if have := value; !bytes.Equal(have, want) { + t.Fatalf("tiny trie: have %#x want %#x", have, want) + } + st = NewStackTrie(nil) + } + // Test with a larger trie + keyB := big.NewInt(1) + keyDelta := big.NewInt(1) + var vals [][]byte + getValue := func(i int) []byte { + if i%2 == 0 { // large + return crypto.Keccak256(big.NewInt(int64(i)).Bytes()) + } else { //small + return big.NewInt(int64(i)).Bytes() + } + } + for i := 0; i < 1000; i++ { + key := common.BigToHash(keyB) + value := getValue(i) + st.Update(key.Bytes(), value) + vals = append(vals, value) + keyB = keyB.Add(keyB, keyDelta) + keyDelta.Add(keyDelta, common.Big1) + } + st.Hash() + for i := 0; i < 1000; i++ { + want := getValue(i) + + have := vals[i] + if !bytes.Equal(have, want) { + t.Fatalf("item %d, have %#x want %#x", i, have, want) + } + } +} + +func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash { + var ( + options = NewStackTrieOptions() + nodes = make(map[string]common.Hash) + ) + var ( + first int + last = len(entries) - 1 + + noLeft bool + noRight bool + ) + // Enter split mode if there are at least two elements + if rand.Intn(5) != 0 { + for { + first = rand.Intn(len(entries)) + last = rand.Intn(len(entries)) + if first <= last { + break + } + } + if first != 0 { + noLeft = true + } + if last != len(entries)-1 { + noRight = true + } + } + options = options.WithSkipBoundary(noLeft, noRight, nil) + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + tr := NewStackTrie(options) + + for i := first; i <= last; i++ { + tr.MustUpdate(entries[i].k, entries[i].v) + } + tr.Commit() + return nodes +} + +func TestPartialStackTrie(t *testing.T) { + for round := 0; round < 100; round++ { + var ( + n = rand.Intn(100) + 1 + entries []*kv + ) + for i := 0; i < n; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testutil.RandBytes(3) + } else { + val = testutil.RandBytes(32) + } + entries = append(entries, &kv{ + k: testutil.RandBytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + var ( + nodes = make(map[string]common.Hash) + options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + ) + tr := NewStackTrie(options) + + for i := 0; i < len(entries); i++ { + tr.MustUpdate(entries[i].k, entries[i].v) + } + tr.Commit() + + for j := 0; j < 100; j++ { + for path, hash := range buildPartialTree(entries, t) { + if nodes[path] != hash { + t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash) + } + } + } + } +} + +func TestStackTrieErrors(t *testing.T) { + s := NewStackTrie(nil) + // Deletion + if err := s.Update(nil, nil); err == nil { + t.Fatal("expected error") + } + if err := s.Update(nil, []byte{}); err == nil { + t.Fatal("expected error") + } + if err := s.Update([]byte{0xa}, []byte{}); err == nil { + t.Fatal("expected error") + } + // Non-ascending keys (going backwards or repeating) + assert.Nil(t, s.Update([]byte{0xaa}, []byte{0xa})) + assert.NotNil(t, s.Update([]byte{0xaa}, []byte{0xa}), "repeat insert same key") + assert.NotNil(t, s.Update([]byte{0xaa}, []byte{0xb}), "repeat insert same key") + assert.Nil(t, s.Update([]byte{0xab}, []byte{0xa})) + assert.NotNil(t, s.Update([]byte{0x10}, []byte{0xb}), "out of order insert") + assert.NotNil(t, s.Update([]byte{0xaa}, []byte{0xb}), "repeat insert same key") +} diff --git a/trie_by_cid/trie/testutil/utils.go b/trie_by_cid/trie/testutil/utils.go new file mode 100644 index 0000000..ac6e3b5 --- /dev/null +++ b/trie_by_cid/trie/testutil/utils.go @@ -0,0 +1,62 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package testutil + +import ( + crand "crypto/rand" + "encoding/binary" + mrand "math/rand" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/trienode" +) + +// Prng is a pseudo random number generator seeded by strong randomness. +// The randomness is printed on startup in order to make failures reproducible. +var prng = initRand() + +func initRand() *mrand.Rand { + var seed [8]byte + crand.Read(seed[:]) + rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:])))) + return rnd +} + +// RandBytes generates a random byte slice with specified length. +func RandBytes(n int) []byte { + r := make([]byte, n) + prng.Read(r) + return r +} + +// RandomHash generates a random blob of data and returns it as a hash. +func RandomHash() common.Hash { + return common.BytesToHash(RandBytes(common.HashLength)) +} + +// RandomAddress generates a random blob of data and returns it as an address. +func RandomAddress() common.Address { + return common.BytesToAddress(RandBytes(common.AddressLength)) +} + +// RandomNode generates a random node. +func RandomNode() *trienode.Node { + val := RandBytes(100) + return trienode.New(crypto.Keccak256Hash(val), val) +} diff --git a/trie_by_cid/trie/tracer_test.go b/trie_by_cid/trie/tracer_test.go new file mode 100644 index 0000000..7fee07a --- /dev/null +++ b/trie_by_cid/trie/tracer_test.go @@ -0,0 +1,377 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/trienode" +) + +var ( + tiny = []struct{ k, v string }{ + {"k1", "v1"}, + {"k2", "v2"}, + {"k3", "v3"}, + } + nonAligned = []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + standard = []struct{ k, v string }{ + {string(randBytes(32)), "verb"}, + {string(randBytes(32)), "wookiedoo"}, + {string(randBytes(32)), "stallion"}, + {string(randBytes(32)), "horse"}, + {string(randBytes(32)), "coin"}, + {string(randBytes(32)), "puppy"}, + {string(randBytes(32)), "myothernodedata"}, + } +) + +func TestTrieTracer(t *testing.T) { + testTrieTracer(t, tiny) + testTrieTracer(t, nonAligned) + testTrieTracer(t, standard) +} + +// Tests if the trie diffs are tracked correctly. Tracer should capture +// all non-leaf dirty nodes, no matter the node is embedded or not. +func testTrieTracer(t *testing.T, vals []struct{ k, v string }) { + db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + trie := NewEmpty(db) + + // Determine all new nodes are tracked + for _, val := range vals { + trie.MustUpdate([]byte(val.k), []byte(val.v)) + } + insertSet := copySet(trie.tracer.inserts) // copy before commit + deleteSet := copySet(trie.tracer.deletes) // copy before commit + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + + seen := setKeys(iterNodes(db, root)) + if !compareSet(insertSet, seen) { + t.Fatal("Unexpected insertion set") + } + if !compareSet(deleteSet, nil) { + t.Fatal("Unexpected deletion set") + } + + // Determine all deletions are tracked + trie, _ = New(TrieID(root), db) + for _, val := range vals { + trie.MustDelete([]byte(val.k)) + } + insertSet, deleteSet = copySet(trie.tracer.inserts), copySet(trie.tracer.deletes) + if !compareSet(insertSet, nil) { + t.Fatal("Unexpected insertion set") + } + if !compareSet(deleteSet, seen) { + t.Fatal("Unexpected deletion set") + } +} + +// Test that after inserting a new batch of nodes and deleting them immediately, +// the trie tracer should be cleared normally as no operation happened. +func TestTrieTracerNoop(t *testing.T) { + testTrieTracerNoop(t, tiny) + testTrieTracerNoop(t, nonAligned) + testTrieTracerNoop(t, standard) +} + +func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) { + db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + trie := NewEmpty(db) + for _, val := range vals { + trie.MustUpdate([]byte(val.k), []byte(val.v)) + } + for _, val := range vals { + trie.MustDelete([]byte(val.k)) + } + if len(trie.tracer.inserts) != 0 { + t.Fatal("Unexpected insertion set") + } + if len(trie.tracer.deletes) != 0 { + t.Fatal("Unexpected deletion set") + } +} + +// Tests if the accessList is correctly tracked. +func TestAccessList(t *testing.T) { + testAccessList(t, tiny) + testAccessList(t, nonAligned) + testAccessList(t, standard) +} + +func testAccessList(t *testing.T, vals []struct{ k, v string }) { + var ( + db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + trie = NewEmpty(db) + orig = trie.Copy() + ) + // Create trie from scratch + for _, val := range vals { + trie.MustUpdate([]byte(val.k), []byte(val.v)) + } + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Update trie + parent := root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + for _, val := range vals { + trie.MustUpdate([]byte(val.k), randBytes(32)) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Add more new nodes + parent = root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + var keys []string + for i := 0; i < 30; i++ { + key := randBytes(32) + keys = append(keys, string(key)) + trie.MustUpdate(key, randBytes(32)) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Partial deletions + parent = root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + for _, key := range keys { + trie.MustUpdate([]byte(key), nil) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } + + // Delete all + parent = root + trie, _ = New(TrieID(root), db) + orig = trie.Copy() + for _, val := range vals { + trie.MustUpdate([]byte(val.k), nil) + } + root, nodes, _ = trie.Commit(false) + db.Update(root, parent, trienode.NewWithNodeSet(nodes)) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, nodes); err != nil { + t.Fatalf("Invalid accessList %v", err) + } +} + +// Tests origin values won't be tracked in Iterator or Prover +func TestAccessListLeak(t *testing.T) { + var ( + db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + trie = NewEmpty(db) + ) + // Create trie from scratch + for _, val := range standard { + trie.MustUpdate([]byte(val.k), []byte(val.v)) + } + root, nodes, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + + var cases = []struct { + op func(tr *Trie) + }{ + { + func(tr *Trie) { + it := tr.MustNodeIterator(nil) + for it.Next(true) { + } + }, + }, + { + func(tr *Trie) { + it := NewIterator(tr.MustNodeIterator(nil)) + for it.Next() { + } + }, + }, + { + func(tr *Trie) { + for _, val := range standard { + tr.Prove([]byte(val.k), rawdb.NewMemoryDatabase()) + } + }, + }, + } + for _, c := range cases { + trie, _ = New(TrieID(root), db) + n1 := len(trie.tracer.accessList) + c.op(trie) + n2 := len(trie.tracer.accessList) + + if n1 != n2 { + t.Fatalf("AccessList is leaked, prev %d after %d", n1, n2) + } + } +} + +// Tests whether the original tree node is correctly deleted after being embedded +// in its parent due to the smaller size of the original tree node. +func TestTinyTree(t *testing.T) { + var ( + db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + trie = NewEmpty(db) + ) + for _, val := range tiny { + trie.MustUpdate([]byte(val.k), randBytes(32)) + } + root, set, _ := trie.Commit(false) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(set)) + + parent := root + trie, _ = New(TrieID(root), db) + orig := trie.Copy() + for _, val := range tiny { + trie.MustUpdate([]byte(val.k), []byte(val.v)) + } + root, set, _ = trie.Commit(false) + db.Update(root, parent, trienode.NewWithNodeSet(set)) + + trie, _ = New(TrieID(root), db) + if err := verifyAccessList(orig, trie, set); err != nil { + t.Fatalf("Invalid accessList %v", err) + } +} + +func compareSet(setA, setB map[string]struct{}) bool { + if len(setA) != len(setB) { + return false + } + for key := range setA { + if _, ok := setB[key]; !ok { + return false + } + } + return true +} + +func forNodes(tr *Trie) map[string][]byte { + var ( + it = tr.MustNodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Leaf() { + continue + } + nodes[string(it.Path())] = common.CopyBytes(it.NodeBlob()) + } + return nodes +} + +func iterNodes(db *testDb, root common.Hash) map[string][]byte { + tr, _ := New(TrieID(root), db) + return forNodes(tr) +} + +func forHashedNodes(tr *Trie) map[string][]byte { + var ( + it = tr.MustNodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Hash() == (common.Hash{}) { + continue + } + nodes[string(it.Path())] = common.CopyBytes(it.NodeBlob()) + } + return nodes +} + +func diffTries(trieA, trieB *Trie) (map[string][]byte, map[string][]byte, map[string][]byte) { + var ( + nodesA = forHashedNodes(trieA) + nodesB = forHashedNodes(trieB) + inA = make(map[string][]byte) // hashed nodes in trie a but not b + inB = make(map[string][]byte) // hashed nodes in trie b but not a + both = make(map[string][]byte) // hashed nodes in both tries but different value + ) + for path, blobA := range nodesA { + if blobB, ok := nodesB[path]; ok { + if bytes.Equal(blobA, blobB) { + continue + } + both[path] = blobA + continue + } + inA[path] = blobA + } + for path, blobB := range nodesB { + if _, ok := nodesA[path]; ok { + continue + } + inB[path] = blobB + } + return inA, inB, both +} + +func setKeys(set map[string][]byte) map[string]struct{} { + keys := make(map[string]struct{}) + for k := range set { + keys[k] = struct{}{} + } + return keys +} + +func copySet(set map[string]struct{}) map[string]struct{} { + copied := make(map[string]struct{}) + for k := range set { + copied[k] = struct{}{} + } + return copied +} diff --git a/trie_by_cid/trie/trienode/node.go b/trie_by_cid/trie/trienode/node.go new file mode 100644 index 0000000..95315c2 --- /dev/null +++ b/trie_by_cid/trie/trienode/node.go @@ -0,0 +1,199 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package trienode + +import ( + "fmt" + "sort" + "strings" + + "github.com/ethereum/go-ethereum/common" +) + +// Node is a wrapper which contains the encoded blob of the trie node and its +// node hash. It is general enough that can be used to represent trie node +// corresponding to different trie implementations. +type Node struct { + Hash common.Hash // Node hash, empty for deleted node + Blob []byte // Encoded node blob, nil for the deleted node +} + +// Size returns the total memory size used by this node. +func (n *Node) Size() int { + return len(n.Blob) + common.HashLength +} + +// IsDeleted returns the indicator if the node is marked as deleted. +func (n *Node) IsDeleted() bool { + return len(n.Blob) == 0 +} + +// New constructs a node with provided node information. +func New(hash common.Hash, blob []byte) *Node { + return &Node{Hash: hash, Blob: blob} +} + +// NewDeleted constructs a node which is deleted. +func NewDeleted() *Node { return New(common.Hash{}, nil) } + +// leaf represents a trie leaf node +type leaf struct { + Blob []byte // raw blob of leaf + Parent common.Hash // the hash of parent node +} + +// NodeSet contains a set of nodes collected during the commit operation. +// Each node is keyed by path. It's not thread-safe to use. +type NodeSet struct { + Owner common.Hash + Leaves []*leaf + Nodes map[string]*Node + updates int // the count of updated and inserted nodes + deletes int // the count of deleted nodes +} + +// NewNodeSet initializes a node set. The owner is zero for the account trie and +// the owning account address hash for storage tries. +func NewNodeSet(owner common.Hash) *NodeSet { + return &NodeSet{ + Owner: owner, + Nodes: make(map[string]*Node), + } +} + +// ForEachWithOrder iterates the nodes with the order from bottom to top, +// right to left, nodes with the longest path will be iterated first. +func (set *NodeSet) ForEachWithOrder(callback func(path string, n *Node)) { + var paths []string + for path := range set.Nodes { + paths = append(paths, path) + } + // Bottom-up, the longest path first + sort.Sort(sort.Reverse(sort.StringSlice(paths))) + for _, path := range paths { + callback(path, set.Nodes[path]) + } +} + +// AddNode adds the provided node into set. +func (set *NodeSet) AddNode(path []byte, n *Node) { + if n.IsDeleted() { + set.deletes += 1 + } else { + set.updates += 1 + } + set.Nodes[string(path)] = n +} + +// Merge adds a set of nodes into the set. +func (set *NodeSet) Merge(owner common.Hash, nodes map[string]*Node) error { + if set.Owner != owner { + return fmt.Errorf("nodesets belong to different owner are not mergeable %x-%x", set.Owner, owner) + } + for path, node := range nodes { + prev, ok := set.Nodes[path] + if ok { + // overwrite happens, revoke the counter + if prev.IsDeleted() { + set.deletes -= 1 + } else { + set.updates -= 1 + } + } + set.AddNode([]byte(path), node) + } + return nil +} + +// AddLeaf adds the provided leaf node into set. TODO(rjl493456442) how can +// we get rid of it? +func (set *NodeSet) AddLeaf(parent common.Hash, blob []byte) { + set.Leaves = append(set.Leaves, &leaf{Blob: blob, Parent: parent}) +} + +// Size returns the number of dirty nodes in set. +func (set *NodeSet) Size() (int, int) { + return set.updates, set.deletes +} + +// Hashes returns the hashes of all updated nodes. TODO(rjl493456442) how can +// we get rid of it? +func (set *NodeSet) Hashes() []common.Hash { + var ret []common.Hash + for _, node := range set.Nodes { + ret = append(ret, node.Hash) + } + return ret +} + +// Summary returns a string-representation of the NodeSet. +func (set *NodeSet) Summary() string { + var out = new(strings.Builder) + fmt.Fprintf(out, "nodeset owner: %v\n", set.Owner) + if set.Nodes != nil { + for path, n := range set.Nodes { + // Deletion + if n.IsDeleted() { + fmt.Fprintf(out, " [-]: %x\n", path) + continue + } + // Insertion or update + fmt.Fprintf(out, " [+/*]: %x -> %v \n", path, n.Hash) + } + } + for _, n := range set.Leaves { + fmt.Fprintf(out, "[leaf]: %v\n", n) + } + return out.String() +} + +// MergedNodeSet represents a merged node set for a group of tries. +type MergedNodeSet struct { + Sets map[common.Hash]*NodeSet +} + +// NewMergedNodeSet initializes an empty merged set. +func NewMergedNodeSet() *MergedNodeSet { + return &MergedNodeSet{Sets: make(map[common.Hash]*NodeSet)} +} + +// NewWithNodeSet constructs a merged nodeset with the provided single set. +func NewWithNodeSet(set *NodeSet) *MergedNodeSet { + merged := NewMergedNodeSet() + merged.Merge(set) + return merged +} + +// Merge merges the provided dirty nodes of a trie into the set. The assumption +// is held that no duplicated set belonging to the same trie will be merged twice. +func (set *MergedNodeSet) Merge(other *NodeSet) error { + subset, present := set.Sets[other.Owner] + if present { + return subset.Merge(other.Owner, other.Nodes) + } + set.Sets[other.Owner] = other + return nil +} + +// Flatten returns a two-dimensional map for internal nodes. +func (set *MergedNodeSet) Flatten() map[common.Hash]map[string]*Node { + nodes := make(map[common.Hash]map[string]*Node) + for owner, set := range set.Sets { + nodes[owner] = set.Nodes + } + return nodes +} diff --git a/trie_by_cid/trie/trienode/proof.go b/trie_by_cid/trie/trienode/proof.go new file mode 100644 index 0000000..012f008 --- /dev/null +++ b/trie_by_cid/trie/trienode/proof.go @@ -0,0 +1,162 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trienode + +import ( + "errors" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" +) + +// ProofSet stores a set of trie nodes. It implements trie.Database and can also +// act as a cache for another trie.Database. +type ProofSet struct { + nodes map[string][]byte + order []string + + dataSize int + lock sync.RWMutex +} + +// NewProofSet creates an empty node set +func NewProofSet() *ProofSet { + return &ProofSet{ + nodes: make(map[string][]byte), + } +} + +// Put stores a new node in the set +func (db *ProofSet) Put(key []byte, value []byte) error { + db.lock.Lock() + defer db.lock.Unlock() + + if _, ok := db.nodes[string(key)]; ok { + return nil + } + keystr := string(key) + + db.nodes[keystr] = common.CopyBytes(value) + db.order = append(db.order, keystr) + db.dataSize += len(value) + + return nil +} + +// Delete removes a node from the set +func (db *ProofSet) Delete(key []byte) error { + db.lock.Lock() + defer db.lock.Unlock() + + delete(db.nodes, string(key)) + return nil +} + +// Get returns a stored node +func (db *ProofSet) Get(key []byte) ([]byte, error) { + db.lock.RLock() + defer db.lock.RUnlock() + + if entry, ok := db.nodes[string(key)]; ok { + return entry, nil + } + return nil, errors.New("not found") +} + +// Has returns true if the node set contains the given key +func (db *ProofSet) Has(key []byte) (bool, error) { + _, err := db.Get(key) + return err == nil, nil +} + +// KeyCount returns the number of nodes in the set +func (db *ProofSet) KeyCount() int { + db.lock.RLock() + defer db.lock.RUnlock() + + return len(db.nodes) +} + +// DataSize returns the aggregated data size of nodes in the set +func (db *ProofSet) DataSize() int { + db.lock.RLock() + defer db.lock.RUnlock() + + return db.dataSize +} + +// List converts the node set to a ProofList +func (db *ProofSet) List() ProofList { + db.lock.RLock() + defer db.lock.RUnlock() + + var values ProofList + for _, key := range db.order { + values = append(values, db.nodes[key]) + } + return values +} + +// Store writes the contents of the set to the given database +func (db *ProofSet) Store(target ethdb.KeyValueWriter) { + db.lock.RLock() + defer db.lock.RUnlock() + + for key, value := range db.nodes { + target.Put([]byte(key), value) + } +} + +// ProofList stores an ordered list of trie nodes. It implements ethdb.KeyValueWriter. +type ProofList []rlp.RawValue + +// Store writes the contents of the list to the given database +func (n ProofList) Store(db ethdb.KeyValueWriter) { + for _, node := range n { + db.Put(crypto.Keccak256(node), node) + } +} + +// Set converts the node list to a ProofSet +func (n ProofList) Set() *ProofSet { + db := NewProofSet() + n.Store(db) + return db +} + +// Put stores a new node at the end of the list +func (n *ProofList) Put(key []byte, value []byte) error { + *n = append(*n, value) + return nil +} + +// Delete panics as there's no reason to remove a node from the list. +func (n *ProofList) Delete(key []byte) error { + panic("not supported") +} + +// DataSize returns the aggregated data size of nodes in the list +func (n ProofList) DataSize() int { + var size int + for _, node := range n { + size += len(node) + } + return size +} diff --git a/trie_by_cid/trie/triestate/state.go b/trie_by_cid/trie/triestate/state.go new file mode 100644 index 0000000..2928113 --- /dev/null +++ b/trie_by_cid/trie/triestate/state.go @@ -0,0 +1,277 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package triestate + +import ( + "errors" + "fmt" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "golang.org/x/crypto/sha3" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/trienode" +) + +// Trie is an Ethereum state trie, can be implemented by Ethereum Merkle Patricia +// tree or Verkle tree. +type Trie interface { + // Get returns the value for key stored in the trie. + Get(key []byte) ([]byte, error) + + // Update associates key with value in the trie. + Update(key, value []byte) error + + // Delete removes any existing value for key from the trie. + Delete(key []byte) error + + // Commit the trie and returns a set of dirty nodes generated along with + // the new root hash. + Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet, error) +} + +// TrieLoader wraps functions to load tries. +type TrieLoader interface { + // OpenTrie opens the main account trie. + OpenTrie(root common.Hash) (Trie, error) + + // OpenStorageTrie opens the storage trie of an account. + OpenStorageTrie(stateRoot common.Hash, addrHash, root common.Hash) (Trie, error) +} + +// Set represents a collection of mutated states during a state transition. +// The value refers to the original content of state before the transition +// is made. Nil means that the state was not present previously. +type Set struct { + Accounts map[common.Address][]byte // Mutated account set, nil means the account was not present + Storages map[common.Address]map[common.Hash][]byte // Mutated storage set, nil means the slot was not present + Incomplete map[common.Address]struct{} // Indicator whether the storage is incomplete due to large deletion + size common.StorageSize // Approximate size of set +} + +// New constructs the state set with provided data. +func New(accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte, incomplete map[common.Address]struct{}) *Set { + return &Set{ + Accounts: accounts, + Storages: storages, + Incomplete: incomplete, + } +} + +// Size returns the approximate memory size occupied by the set. +func (s *Set) Size() common.StorageSize { + if s.size != 0 { + return s.size + } + for _, account := range s.Accounts { + s.size += common.StorageSize(common.AddressLength + len(account)) + } + for _, slots := range s.Storages { + for _, val := range slots { + s.size += common.StorageSize(common.HashLength + len(val)) + } + s.size += common.StorageSize(common.AddressLength) + } + s.size += common.StorageSize(common.AddressLength * len(s.Incomplete)) + return s.size +} + +// context wraps all fields for executing state diffs. +type context struct { + prevRoot common.Hash + postRoot common.Hash + accounts map[common.Address][]byte + storages map[common.Address]map[common.Hash][]byte + accountTrie Trie + nodes *trienode.MergedNodeSet +} + +// Apply traverses the provided state diffs, apply them in the associated +// post-state and return the generated dirty trie nodes. The state can be +// loaded via the provided trie loader. +func Apply(prevRoot common.Hash, postRoot common.Hash, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte, loader TrieLoader) (map[common.Hash]map[string]*trienode.Node, error) { + tr, err := loader.OpenTrie(postRoot) + if err != nil { + return nil, err + } + ctx := &context{ + prevRoot: prevRoot, + postRoot: postRoot, + accounts: accounts, + storages: storages, + accountTrie: tr, + nodes: trienode.NewMergedNodeSet(), + } + for addr, account := range accounts { + var err error + if len(account) == 0 { + err = deleteAccount(ctx, loader, addr) + } else { + err = updateAccount(ctx, loader, addr) + } + if err != nil { + return nil, fmt.Errorf("failed to revert state, err: %w", err) + } + } + root, result, err := tr.Commit(false) + if err != nil { + return nil, err + } + if root != prevRoot { + return nil, fmt.Errorf("failed to revert state, want %#x, got %#x", prevRoot, root) + } + if err := ctx.nodes.Merge(result); err != nil { + return nil, err + } + return ctx.nodes.Flatten(), nil +} + +// updateAccount the account was present in prev-state, and may or may not +// existent in post-state. Apply the reverse diff and verify if the storage +// root matches the one in prev-state account. +func updateAccount(ctx *context, loader TrieLoader, addr common.Address) error { + // The account was present in prev-state, decode it from the + // 'slim-rlp' format bytes. + h := newHasher() + defer h.release() + + addrHash := h.hash(addr.Bytes()) + prev, err := types.FullAccount(ctx.accounts[addr]) + if err != nil { + return err + } + // The account may or may not existent in post-state, try to + // load it and decode if it's found. + blob, err := ctx.accountTrie.Get(addrHash.Bytes()) + if err != nil { + return err + } + post := types.NewEmptyStateAccount() + if len(blob) != 0 { + if err := rlp.DecodeBytes(blob, &post); err != nil { + return err + } + } + // Apply all storage changes into the post-state storage trie. + st, err := loader.OpenStorageTrie(ctx.postRoot, addrHash, post.Root) + if err != nil { + return err + } + for key, val := range ctx.storages[addr] { + var err error + if len(val) == 0 { + err = st.Delete(key.Bytes()) + } else { + err = st.Update(key.Bytes(), val) + } + if err != nil { + return err + } + } + root, result, err := st.Commit(false) + if err != nil { + return err + } + if root != prev.Root { + return errors.New("failed to reset storage trie") + } + // The returned set can be nil if storage trie is not changed + // at all. + if result != nil { + if err := ctx.nodes.Merge(result); err != nil { + return err + } + } + // Write the prev-state account into the main trie + full, err := rlp.EncodeToBytes(prev) + if err != nil { + return err + } + return ctx.accountTrie.Update(addrHash.Bytes(), full) +} + +// deleteAccount the account was not present in prev-state, and is expected +// to be existent in post-state. Apply the reverse diff and verify if the +// account and storage is wiped out correctly. +func deleteAccount(ctx *context, loader TrieLoader, addr common.Address) error { + // The account must be existent in post-state, load the account. + h := newHasher() + defer h.release() + + addrHash := h.hash(addr.Bytes()) + blob, err := ctx.accountTrie.Get(addrHash.Bytes()) + if err != nil { + return err + } + if len(blob) == 0 { + return fmt.Errorf("account is non-existent %#x", addrHash) + } + var post types.StateAccount + if err := rlp.DecodeBytes(blob, &post); err != nil { + return err + } + st, err := loader.OpenStorageTrie(ctx.postRoot, addrHash, post.Root) + if err != nil { + return err + } + for key, val := range ctx.storages[addr] { + if len(val) != 0 { + return errors.New("expect storage deletion") + } + if err := st.Delete(key.Bytes()); err != nil { + return err + } + } + root, result, err := st.Commit(false) + if err != nil { + return err + } + if root != types.EmptyRootHash { + return errors.New("failed to clear storage trie") + } + // The returned set can be nil if storage trie is not changed + // at all. + if result != nil { + if err := ctx.nodes.Merge(result); err != nil { + return err + } + } + // Delete the post-state account from the main trie. + return ctx.accountTrie.Delete(addrHash.Bytes()) +} + +// hasher is used to compute the sha256 hash of the provided data. +type hasher struct{ sha crypto.KeccakState } + +var hasherPool = sync.Pool{ + New: func() interface{} { return &hasher{sha: sha3.NewLegacyKeccak256().(crypto.KeccakState)} }, +} + +func newHasher() *hasher { + return hasherPool.Get().(*hasher) +} + +func (h *hasher) hash(data []byte) common.Hash { + return crypto.HashData(h.sha, data) +} + +func (h *hasher) release() { + hasherPool.Put(h) +} diff --git a/trie_by_cid/trie/utils/verkle.go b/trie_by_cid/trie/utils/verkle.go new file mode 100644 index 0000000..ce059ed --- /dev/null +++ b/trie_by_cid/trie/utils/verkle.go @@ -0,0 +1,342 @@ +// Copyright 2023 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package utils + +import ( + "encoding/binary" + "sync" + + "github.com/crate-crypto/go-ipa/bandersnatch/fr" + "github.com/ethereum/go-ethereum/common/lru" + "github.com/ethereum/go-ethereum/metrics" + "github.com/gballet/go-verkle" + "github.com/holiman/uint256" +) + +const ( + // The spec of verkle key encoding can be found here. + // https://notes.ethereum.org/@vbuterin/verkle_tree_eip#Tree-embedding + VersionLeafKey = 0 + BalanceLeafKey = 1 + NonceLeafKey = 2 + CodeKeccakLeafKey = 3 + CodeSizeLeafKey = 4 +) + +var ( + zero = uint256.NewInt(0) + verkleNodeWidthLog2 = 8 + headerStorageOffset = uint256.NewInt(64) + mainStorageOffsetLshVerkleNodeWidth = new(uint256.Int).Lsh(uint256.NewInt(256), 31-uint(verkleNodeWidthLog2)) + codeOffset = uint256.NewInt(128) + verkleNodeWidth = uint256.NewInt(256) + codeStorageDelta = uint256.NewInt(0).Sub(codeOffset, headerStorageOffset) + + index0Point *verkle.Point // pre-computed commitment of polynomial [2+256*64] + + // cacheHitGauge is the metric to track how many cache hit occurred. + cacheHitGauge = metrics.NewRegisteredGauge("trie/verkle/cache/hit", nil) + + // cacheMissGauge is the metric to track how many cache miss occurred. + cacheMissGauge = metrics.NewRegisteredGauge("trie/verkle/cache/miss", nil) +) + +func init() { + // The byte array is the Marshalled output of the point computed as such: + // + // var ( + // config = verkle.GetConfig() + // fr verkle.Fr + // ) + // verkle.FromLEBytes(&fr, []byte{2, 64}) + // point := config.CommitToPoly([]verkle.Fr{fr}, 1) + index0Point = new(verkle.Point) + err := index0Point.SetBytes([]byte{34, 25, 109, 242, 193, 5, 144, 224, 76, 52, 189, 92, 197, 126, 9, 145, 27, 152, 199, 130, 165, 3, 210, 27, 193, 131, 142, 28, 110, 26, 16, 191}) + if err != nil { + panic(err) + } +} + +// PointCache is the LRU cache for storing evaluated address commitment. +type PointCache struct { + lru lru.BasicLRU[string, *verkle.Point] + lock sync.RWMutex +} + +// NewPointCache returns the cache with specified size. +func NewPointCache(maxItems int) *PointCache { + return &PointCache{ + lru: lru.NewBasicLRU[string, *verkle.Point](maxItems), + } +} + +// Get returns the cached commitment for the specified address, or computing +// it on the flight. +func (c *PointCache) Get(addr []byte) *verkle.Point { + c.lock.Lock() + defer c.lock.Unlock() + + p, ok := c.lru.Get(string(addr)) + if ok { + cacheHitGauge.Inc(1) + return p + } + cacheMissGauge.Inc(1) + p = evaluateAddressPoint(addr) + c.lru.Add(string(addr), p) + return p +} + +// GetStem returns the first 31 bytes of the tree key as the tree stem. It only +// works for the account metadata whose treeIndex is 0. +func (c *PointCache) GetStem(addr []byte) []byte { + p := c.Get(addr) + return pointToHash(p, 0)[:31] +} + +// GetTreeKey performs both the work of the spec's get_tree_key function, and that +// of pedersen_hash: it builds the polynomial in pedersen_hash without having to +// create a mostly zero-filled buffer and "type cast" it to a 128-long 16-byte +// array. Since at most the first 5 coefficients of the polynomial will be non-zero, +// these 5 coefficients are created directly. +func GetTreeKey(address []byte, treeIndex *uint256.Int, subIndex byte) []byte { + if len(address) < 32 { + var aligned [32]byte + address = append(aligned[:32-len(address)], address...) + } + // poly = [2+256*64, address_le_low, address_le_high, tree_index_le_low, tree_index_le_high] + var poly [5]fr.Element + + // 32-byte address, interpreted as two little endian + // 16-byte numbers. + verkle.FromLEBytes(&poly[1], address[:16]) + verkle.FromLEBytes(&poly[2], address[16:]) + + // treeIndex must be interpreted as a 32-byte aligned little-endian integer. + // e.g: if treeIndex is 0xAABBCC, we need the byte representation to be 0xCCBBAA00...00. + // poly[3] = LE({CC,BB,AA,00...0}) (16 bytes), poly[4]=LE({00,00,...}) (16 bytes). + // + // To avoid unnecessary endianness conversions for go-ipa, we do some trick: + // - poly[3]'s byte representation is the same as the *top* 16 bytes (trieIndexBytes[16:]) of + // 32-byte aligned big-endian representation (BE({00,...,AA,BB,CC})). + // - poly[4]'s byte representation is the same as the *low* 16 bytes (trieIndexBytes[:16]) of + // the 32-byte aligned big-endian representation (BE({00,00,...}). + trieIndexBytes := treeIndex.Bytes32() + verkle.FromBytes(&poly[3], trieIndexBytes[16:]) + verkle.FromBytes(&poly[4], trieIndexBytes[:16]) + + cfg := verkle.GetConfig() + ret := cfg.CommitToPoly(poly[:], 0) + + // add a constant point corresponding to poly[0]=[2+256*64]. + ret.Add(ret, index0Point) + + return pointToHash(ret, subIndex) +} + +// GetTreeKeyWithEvaluatedAddress is basically identical to GetTreeKey, the only +// difference is a part of polynomial is already evaluated. +// +// Specifically, poly = [2+256*64, address_le_low, address_le_high] is already +// evaluated. +func GetTreeKeyWithEvaluatedAddress(evaluated *verkle.Point, treeIndex *uint256.Int, subIndex byte) []byte { + var poly [5]fr.Element + + poly[0].SetZero() + poly[1].SetZero() + poly[2].SetZero() + + // little-endian, 32-byte aligned treeIndex + var index [32]byte + for i := 0; i < len(treeIndex); i++ { + binary.LittleEndian.PutUint64(index[i*8:(i+1)*8], treeIndex[i]) + } + verkle.FromLEBytes(&poly[3], index[:16]) + verkle.FromLEBytes(&poly[4], index[16:]) + + cfg := verkle.GetConfig() + ret := cfg.CommitToPoly(poly[:], 0) + + // add the pre-evaluated address + ret.Add(ret, evaluated) + + return pointToHash(ret, subIndex) +} + +// VersionKey returns the verkle tree key of the version field for the specified account. +func VersionKey(address []byte) []byte { + return GetTreeKey(address, zero, VersionLeafKey) +} + +// BalanceKey returns the verkle tree key of the balance field for the specified account. +func BalanceKey(address []byte) []byte { + return GetTreeKey(address, zero, BalanceLeafKey) +} + +// NonceKey returns the verkle tree key of the nonce field for the specified account. +func NonceKey(address []byte) []byte { + return GetTreeKey(address, zero, NonceLeafKey) +} + +// CodeKeccakKey returns the verkle tree key of the code keccak field for +// the specified account. +func CodeKeccakKey(address []byte) []byte { + return GetTreeKey(address, zero, CodeKeccakLeafKey) +} + +// CodeSizeKey returns the verkle tree key of the code size field for the +// specified account. +func CodeSizeKey(address []byte) []byte { + return GetTreeKey(address, zero, CodeSizeLeafKey) +} + +func codeChunkIndex(chunk *uint256.Int) (*uint256.Int, byte) { + var ( + chunkOffset = new(uint256.Int).Add(codeOffset, chunk) + treeIndex = new(uint256.Int).Div(chunkOffset, verkleNodeWidth) + subIndexMod = new(uint256.Int).Mod(chunkOffset, verkleNodeWidth) + ) + var subIndex byte + if len(subIndexMod) != 0 { + subIndex = byte(subIndexMod[0]) + } + return treeIndex, subIndex +} + +// CodeChunkKey returns the verkle tree key of the code chunk for the +// specified account. +func CodeChunkKey(address []byte, chunk *uint256.Int) []byte { + treeIndex, subIndex := codeChunkIndex(chunk) + return GetTreeKey(address, treeIndex, subIndex) +} + +func storageIndex(bytes []byte) (*uint256.Int, byte) { + // If the storage slot is in the header, we need to add the header offset. + var key uint256.Int + key.SetBytes(bytes) + if key.Cmp(codeStorageDelta) < 0 { + // This addition is always safe; it can't ever overflow since pos + +package utils + +import ( + "bytes" + "testing" + + "github.com/gballet/go-verkle" + "github.com/holiman/uint256" +) + +func TestTreeKey(t *testing.T) { + var ( + address = []byte{0x01} + addressEval = evaluateAddressPoint(address) + smallIndex = uint256.NewInt(1) + largeIndex = uint256.NewInt(10000) + smallStorage = []byte{0x1} + largeStorage = bytes.Repeat([]byte{0xff}, 16) + ) + if !bytes.Equal(VersionKey(address), VersionKeyWithEvaluatedAddress(addressEval)) { + t.Fatal("Unmatched version key") + } + if !bytes.Equal(BalanceKey(address), BalanceKeyWithEvaluatedAddress(addressEval)) { + t.Fatal("Unmatched balance key") + } + if !bytes.Equal(NonceKey(address), NonceKeyWithEvaluatedAddress(addressEval)) { + t.Fatal("Unmatched nonce key") + } + if !bytes.Equal(CodeKeccakKey(address), CodeKeccakKeyWithEvaluatedAddress(addressEval)) { + t.Fatal("Unmatched code keccak key") + } + if !bytes.Equal(CodeSizeKey(address), CodeSizeKeyWithEvaluatedAddress(addressEval)) { + t.Fatal("Unmatched code size key") + } + if !bytes.Equal(CodeChunkKey(address, smallIndex), CodeChunkKeyWithEvaluatedAddress(addressEval, smallIndex)) { + t.Fatal("Unmatched code chunk key") + } + if !bytes.Equal(CodeChunkKey(address, largeIndex), CodeChunkKeyWithEvaluatedAddress(addressEval, largeIndex)) { + t.Fatal("Unmatched code chunk key") + } + if !bytes.Equal(StorageSlotKey(address, smallStorage), StorageSlotKeyWithEvaluatedAddress(addressEval, smallStorage)) { + t.Fatal("Unmatched storage slot key") + } + if !bytes.Equal(StorageSlotKey(address, largeStorage), StorageSlotKeyWithEvaluatedAddress(addressEval, largeStorage)) { + t.Fatal("Unmatched storage slot key") + } +} + +// goos: darwin +// goarch: amd64 +// pkg: github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/utils +// cpu: VirtualApple @ 2.50GHz +// BenchmarkTreeKey +// BenchmarkTreeKey-8 398731 2961 ns/op 32 B/op 1 allocs/op +func BenchmarkTreeKey(b *testing.B) { + // Initialize the IPA settings which can be pretty expensive. + verkle.GetConfig() + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + BalanceKey([]byte{0x01}) + } +} + +// goos: darwin +// goarch: amd64 +// pkg: github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/utils +// cpu: VirtualApple @ 2.50GHz +// BenchmarkTreeKeyWithEvaluation +// BenchmarkTreeKeyWithEvaluation-8 513855 2324 ns/op 32 B/op 1 allocs/op +func BenchmarkTreeKeyWithEvaluation(b *testing.B) { + // Initialize the IPA settings which can be pretty expensive. + verkle.GetConfig() + + addr := []byte{0x01} + eval := evaluateAddressPoint(addr) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + BalanceKeyWithEvaluatedAddress(eval) + } +} + +// goos: darwin +// goarch: amd64 +// pkg: github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/utils +// cpu: VirtualApple @ 2.50GHz +// BenchmarkStorageKey +// BenchmarkStorageKey-8 230516 4584 ns/op 96 B/op 3 allocs/op +func BenchmarkStorageKey(b *testing.B) { + // Initialize the IPA settings which can be pretty expensive. + verkle.GetConfig() + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + StorageSlotKey([]byte{0x01}, bytes.Repeat([]byte{0xff}, 32)) + } +} + +// goos: darwin +// goarch: amd64 +// pkg: github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/utils +// cpu: VirtualApple @ 2.50GHz +// BenchmarkStorageKeyWithEvaluation +// BenchmarkStorageKeyWithEvaluation-8 320125 3753 ns/op 96 B/op 3 allocs/op +func BenchmarkStorageKeyWithEvaluation(b *testing.B) { + // Initialize the IPA settings which can be pretty expensive. + verkle.GetConfig() + + addr := []byte{0x01} + eval := evaluateAddressPoint(addr) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + StorageSlotKeyWithEvaluatedAddress(eval, bytes.Repeat([]byte{0xff}, 32)) + } +} diff --git a/trie_by_cid/trie/verkle.go b/trie_by_cid/trie/verkle.go new file mode 100644 index 0000000..b6296d3 --- /dev/null +++ b/trie_by_cid/trie/verkle.go @@ -0,0 +1,373 @@ +// Copyright 2023 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "encoding/binary" + "errors" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/gballet/go-verkle" + "github.com/holiman/uint256" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/trienode" + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/utils" + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/triedb/database" +) + +var ( + zero [32]byte + errInvalidRootType = errors.New("invalid node type for root") +) + +// VerkleTrie is a wrapper around VerkleNode that implements the trie.Trie +// interface so that Verkle trees can be reused verbatim. +type VerkleTrie struct { + root verkle.VerkleNode + cache *utils.PointCache + reader *trieReader +} + +// NewVerkleTrie constructs a verkle tree based on the specified root hash. +func NewVerkleTrie(root common.Hash, db database.Database, cache *utils.PointCache) (*VerkleTrie, error) { + reader, err := newTrieReader(root, common.Hash{}, db) + if err != nil { + return nil, err + } + // Parse the root verkle node if it's not empty. + node := verkle.New() + if root != types.EmptyVerkleHash && root != types.EmptyRootHash { + blob, err := reader.node(nil, common.Hash{}) + if err != nil { + return nil, err + } + node, err = verkle.ParseNode(blob, 0) + if err != nil { + return nil, err + } + } + return &VerkleTrie{ + root: node, + cache: cache, + reader: reader, + }, nil +} + +// GetKey returns the sha3 preimage of a hashed key that was previously used +// to store a value. +func (t *VerkleTrie) GetKey(key []byte) []byte { + return key +} + +// GetAccount implements state.Trie, retrieving the account with the specified +// account address. If the specified account is not in the verkle tree, nil will +// be returned. If the tree is corrupted, an error will be returned. +func (t *VerkleTrie) GetAccount(addr common.Address) (*types.StateAccount, error) { + var ( + acc = &types.StateAccount{} + values [][]byte + err error + ) + switch n := t.root.(type) { + case *verkle.InternalNode: + values, err = n.GetValuesAtStem(t.cache.GetStem(addr[:]), t.nodeResolver) + if err != nil { + return nil, fmt.Errorf("GetAccount (%x) error: %v", addr, err) + } + default: + return nil, errInvalidRootType + } + if values == nil { + return nil, nil + } + // Decode nonce in little-endian + if len(values[utils.NonceLeafKey]) > 0 { + acc.Nonce = binary.LittleEndian.Uint64(values[utils.NonceLeafKey]) + } + // Decode balance in little-endian + var balance [32]byte + copy(balance[:], values[utils.BalanceLeafKey]) + for i := 0; i < len(balance)/2; i++ { + balance[len(balance)-i-1], balance[i] = balance[i], balance[len(balance)-i-1] + } + acc.Balance = new(uint256.Int).SetBytes32(balance[:]) + + // Decode codehash + acc.CodeHash = values[utils.CodeKeccakLeafKey] + + // TODO account.Root is leave as empty. How should we handle the legacy account? + return acc, nil +} + +// GetStorage implements state.Trie, retrieving the storage slot with the specified +// account address and storage key. If the specified slot is not in the verkle tree, +// nil will be returned. If the tree is corrupted, an error will be returned. +func (t *VerkleTrie) GetStorage(addr common.Address, key []byte) ([]byte, error) { + k := utils.StorageSlotKeyWithEvaluatedAddress(t.cache.Get(addr.Bytes()), key) + val, err := t.root.Get(k, t.nodeResolver) + if err != nil { + return nil, err + } + return common.TrimLeftZeroes(val), nil +} + +// UpdateAccount implements state.Trie, writing the provided account into the tree. +// If the tree is corrupted, an error will be returned. +func (t *VerkleTrie) UpdateAccount(addr common.Address, acc *types.StateAccount) error { + var ( + err error + nonce, balance [32]byte + values = make([][]byte, verkle.NodeWidth) + ) + values[utils.VersionLeafKey] = zero[:] + values[utils.CodeKeccakLeafKey] = acc.CodeHash[:] + + // Encode nonce in little-endian + binary.LittleEndian.PutUint64(nonce[:], acc.Nonce) + values[utils.NonceLeafKey] = nonce[:] + + // Encode balance in little-endian + bytes := acc.Balance.Bytes() + if len(bytes) > 0 { + for i, b := range bytes { + balance[len(bytes)-i-1] = b + } + } + values[utils.BalanceLeafKey] = balance[:] + + switch n := t.root.(type) { + case *verkle.InternalNode: + err = n.InsertValuesAtStem(t.cache.GetStem(addr[:]), values, t.nodeResolver) + if err != nil { + return fmt.Errorf("UpdateAccount (%x) error: %v", addr, err) + } + default: + return errInvalidRootType + } + // TODO figure out if the code size needs to be updated, too + return nil +} + +// UpdateStorage implements state.Trie, writing the provided storage slot into +// the tree. If the tree is corrupted, an error will be returned. +func (t *VerkleTrie) UpdateStorage(address common.Address, key, value []byte) error { + // Left padding the slot value to 32 bytes. + var v [32]byte + if len(value) >= 32 { + copy(v[:], value[:32]) + } else { + copy(v[32-len(value):], value[:]) + } + k := utils.StorageSlotKeyWithEvaluatedAddress(t.cache.Get(address.Bytes()), key) + return t.root.Insert(k, v[:], t.nodeResolver) +} + +// DeleteAccount implements state.Trie, deleting the specified account from the +// trie. If the account was not existent in the trie, no error will be returned. +// If the trie is corrupted, an error will be returned. +func (t *VerkleTrie) DeleteAccount(addr common.Address) error { + var ( + err error + values = make([][]byte, verkle.NodeWidth) + ) + for i := 0; i < verkle.NodeWidth; i++ { + values[i] = zero[:] + } + switch n := t.root.(type) { + case *verkle.InternalNode: + err = n.InsertValuesAtStem(t.cache.GetStem(addr.Bytes()), values, t.nodeResolver) + if err != nil { + return fmt.Errorf("DeleteAccount (%x) error: %v", addr, err) + } + default: + return errInvalidRootType + } + return nil +} + +// DeleteStorage implements state.Trie, deleting the specified storage slot from +// the trie. If the storage slot was not existent in the trie, no error will be +// returned. If the trie is corrupted, an error will be returned. +func (t *VerkleTrie) DeleteStorage(addr common.Address, key []byte) error { + var zero [32]byte + k := utils.StorageSlotKeyWithEvaluatedAddress(t.cache.Get(addr.Bytes()), key) + return t.root.Insert(k, zero[:], t.nodeResolver) +} + +// Hash returns the root hash of the tree. It does not write to the database and +// can be used even if the tree doesn't have one. +func (t *VerkleTrie) Hash() common.Hash { + return t.root.Commit().Bytes() +} + +// Commit writes all nodes to the tree's memory database. +func (t *VerkleTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet, error) { + root, ok := t.root.(*verkle.InternalNode) + if !ok { + return common.Hash{}, nil, errors.New("unexpected root node type") + } + nodes, err := root.BatchSerialize() + if err != nil { + return common.Hash{}, nil, fmt.Errorf("serializing tree nodes: %s", err) + } + nodeset := trienode.NewNodeSet(common.Hash{}) + for _, node := range nodes { + // hash parameter is not used in pathdb + nodeset.AddNode(node.Path, trienode.New(common.Hash{}, node.SerializedBytes)) + } + // Serialize root commitment form + return t.Hash(), nodeset, nil +} + +// NodeIterator implements state.Trie, returning an iterator that returns +// nodes of the trie. Iteration starts at the key after the given start key. +// +// TODO(gballet, rjl493456442) implement it. +func (t *VerkleTrie) NodeIterator(startKey []byte) (NodeIterator, error) { + panic("not implemented") +} + +// Prove implements state.Trie, constructing a Merkle proof for key. The result +// contains all encoded nodes on the path to the value at key. The value itself +// is also included in the last node and can be retrieved by verifying the proof. +// +// If the trie does not contain a value for key, the returned proof contains all +// nodes of the longest existing prefix of the key (at least the root), ending +// with the node that proves the absence of the key. +// +// TODO(gballet, rjl493456442) implement it. +func (t *VerkleTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error { + panic("not implemented") +} + +// Copy returns a deep-copied verkle tree. +func (t *VerkleTrie) Copy() *VerkleTrie { + return &VerkleTrie{ + root: t.root.Copy(), + cache: t.cache, + reader: t.reader, + } +} + +// IsVerkle indicates if the trie is a Verkle trie. +func (t *VerkleTrie) IsVerkle() bool { + return true +} + +// ChunkedCode represents a sequence of 32-bytes chunks of code (31 bytes of which +// are actual code, and 1 byte is the pushdata offset). +type ChunkedCode []byte + +// Copy the values here so as to avoid an import cycle +const ( + PUSH1 = byte(0x60) + PUSH32 = byte(0x7f) +) + +// ChunkifyCode generates the chunked version of an array representing EVM bytecode +func ChunkifyCode(code []byte) ChunkedCode { + var ( + chunkOffset = 0 // offset in the chunk + chunkCount = len(code) / 31 + codeOffset = 0 // offset in the code + ) + if len(code)%31 != 0 { + chunkCount++ + } + chunks := make([]byte, chunkCount*32) + for i := 0; i < chunkCount; i++ { + // number of bytes to copy, 31 unless the end of the code has been reached. + end := 31 * (i + 1) + if len(code) < end { + end = len(code) + } + copy(chunks[i*32+1:], code[31*i:end]) // copy the code itself + + // chunk offset = taken from the last chunk. + if chunkOffset > 31 { + // skip offset calculation if push data covers the whole chunk + chunks[i*32] = 31 + chunkOffset = 1 + continue + } + chunks[32*i] = byte(chunkOffset) + chunkOffset = 0 + + // Check each instruction and update the offset it should be 0 unless + // a PUSH-N overflows. + for ; codeOffset < end; codeOffset++ { + if code[codeOffset] >= PUSH1 && code[codeOffset] <= PUSH32 { + codeOffset += int(code[codeOffset] - PUSH1 + 1) + if codeOffset+1 >= 31*(i+1) { + codeOffset++ + chunkOffset = codeOffset - 31*(i+1) + break + } + } + } + } + return chunks +} + +// UpdateContractCode implements state.Trie, writing the provided contract code +// into the trie. +func (t *VerkleTrie) UpdateContractCode(addr common.Address, codeHash common.Hash, code []byte) error { + var ( + chunks = ChunkifyCode(code) + values [][]byte + key []byte + err error + ) + for i, chunknr := 0, uint64(0); i < len(chunks); i, chunknr = i+32, chunknr+1 { + groupOffset := (chunknr + 128) % 256 + if groupOffset == 0 /* start of new group */ || chunknr == 0 /* first chunk in header group */ { + values = make([][]byte, verkle.NodeWidth) + key = utils.CodeChunkKeyWithEvaluatedAddress(t.cache.Get(addr.Bytes()), uint256.NewInt(chunknr)) + } + values[groupOffset] = chunks[i : i+32] + + // Reuse the calculated key to also update the code size. + if i == 0 { + cs := make([]byte, 32) + binary.LittleEndian.PutUint64(cs, uint64(len(code))) + values[utils.CodeSizeLeafKey] = cs + } + if groupOffset == 255 || len(chunks)-i <= 32 { + switch root := t.root.(type) { + case *verkle.InternalNode: + err = root.InsertValuesAtStem(key[:31], values, t.nodeResolver) + if err != nil { + return fmt.Errorf("UpdateContractCode (addr=%x) error: %w", addr[:], err) + } + default: + return errInvalidRootType + } + } + } + return nil +} + +func (t *VerkleTrie) ToDot() string { + return verkle.ToDot(t.root) +} + +func (t *VerkleTrie) nodeResolver(path []byte) ([]byte, error) { + return t.reader.node(path, common.Hash{}) +} diff --git a/trie_by_cid/trie/verkle_test.go b/trie_by_cid/trie/verkle_test.go new file mode 100644 index 0000000..904b19e --- /dev/null +++ b/trie_by_cid/trie/verkle_test.go @@ -0,0 +1,92 @@ +// Copyright 2023 go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "reflect" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/holiman/uint256" + + "github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie/utils" +) + +var ( + accounts = map[common.Address]*types.StateAccount{ + {1}: { + Nonce: 100, + Balance: uint256.NewInt(100), + CodeHash: common.Hash{0x1}.Bytes(), + }, + {2}: { + Nonce: 200, + Balance: uint256.NewInt(200), + CodeHash: common.Hash{0x2}.Bytes(), + }, + } + storages = map[common.Address]map[common.Hash][]byte{ + {1}: { + common.Hash{10}: []byte{10}, + common.Hash{11}: []byte{11}, + common.MaxHash: []byte{0xff}, + }, + {2}: { + common.Hash{20}: []byte{20}, + common.Hash{21}: []byte{21}, + common.MaxHash: []byte{0xff}, + }, + } +) + +func TestVerkleTreeReadWrite(t *testing.T) { + db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.PathScheme) + tr, _ := NewVerkleTrie(types.EmptyVerkleHash, db, utils.NewPointCache(100)) + + for addr, acct := range accounts { + if err := tr.UpdateAccount(addr, acct); err != nil { + t.Fatalf("Failed to update account, %v", err) + } + for key, val := range storages[addr] { + if err := tr.UpdateStorage(addr, key.Bytes(), val); err != nil { + t.Fatalf("Failed to update account, %v", err) + } + } + } + + for addr, acct := range accounts { + stored, err := tr.GetAccount(addr) + if err != nil { + t.Fatalf("Failed to get account, %v", err) + } + if !reflect.DeepEqual(stored, acct) { + t.Fatal("account is not matched") + } + for key, val := range storages[addr] { + stored, err := tr.GetStorage(addr, key.Bytes()) + if err != nil { + t.Fatalf("Failed to get storage, %v", err) + } + if !bytes.Equal(stored, val) { + t.Fatal("storage is not matched") + } + } + } +}