Proof tests (#17)

* Port proof tests from geth * Scale trie size, seed rand
2023-04-24 18:04:13 +08:00 · 2023-04-24 18:04:13 +08:00 · b6ef6d4e12
commit b6ef6d4e12
parent 32e4994b09
6 changed files with 1228 additions and 489 deletions
--- a/trie_by_cid/helper/statediff_helper.go
+++ b/trie_by_cid/helper/statediff_helper.go
@ -15,28 +15,14 @@ import (
 )

 var (
-	// ChainDB     = rawdb.NewMemoryDatabase()
 	ChainConfig = params.TestChainConfig
-	// BankFunds   = new(big.Int).Mul(big.NewInt(1e4), big.NewInt(params.Ether)) // i.e. 10,000eth

 	mockTD = big.NewInt(1)
-	// ctx    = context.Background()
-	// signer = types.NewLondonSigner(ChainConfig.ChainID)
 )

 func IndexChain(dbConfig postgres.Config, stateCache state.Database, rootA, rootB common.Hash) error {
 	_, indexer, err := indexer.NewStateDiffIndexer(
-		context.Background(),
-		ChainConfig,
-		node.Info{},
-		// node.Info{
-		// 	GenesisBlock: Genesis.Hash().String(),
-		// 	NetworkID:    "test_network",
-		// 	ID:           "test_node",
-		// 	ClientName:   "geth",
-		// 	ChainID:      ChainConfig.ChainID.Uint64(),
-		// },
-		dbConfig)
+		context.Background(), ChainConfig, node.Info{}, dbConfig)
 	if err != nil {
 		return err
 	}
@ -50,8 +36,6 @@ func IndexChain(dbConfig postgres.Config, stateCache state.Database, rootA, root
 	args := statediff.Args{
 		OldStateRoot: rootA,
 		NewStateRoot: rootB,
-		// BlockNumber:  block.Number(),
-		// BlockHash:    block.Hash(),
 	}
 	diff, err := builder.BuildStateDiffObject(args, statediff.Params{})
 	if err != nil {
@ -73,9 +57,4 @@ func IndexChain(dbConfig postgres.Config, stateCache state.Database, rootA, root
 		}
 	}
 	return tx.Submit(err)
-
-	// if err = tx.Submit(err); err != nil {
-	// 	return err
-	// }
-	// return nil
 }
--- a/trie_by_cid/trie/database_test.go
+++ b/trie_by_cid/trie/database_test.go
@ -14,7 +14,7 @@
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

-package trie
+package trie_test

 import (
 	"testing"
--- a/trie_by_cid/trie/iterator_test.go
+++ b/trie_by_cid/trie/iterator_test.go
@ -20,33 +20,17 @@ import (
 	"bytes"
 	"context"
 	"fmt"
-	"math/big"
 	"testing"
 	"time"

-	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
-	geth_state "github.com/ethereum/go-ethereum/core/state"
-	"github.com/ethereum/go-ethereum/core/types"
-	"github.com/ethereum/go-ethereum/ethdb"
-	"github.com/ethereum/go-ethereum/rlp"
 	"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"
-	"github.com/ethereum/go-ethereum/statediff/indexer/ipld"
-	"github.com/ethereum/go-ethereum/statediff/test_helpers"
 	geth_trie "github.com/ethereum/go-ethereum/trie"

 	pgipfsethdb "github.com/cerc-io/ipfs-ethdb/v5/postgres/v0"
-	"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/helper"
-	"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/state"
 	"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie"
 )

-type kvs struct {
-	k string
-	v int64
-}
-type kvMap map[string]int64
-
 var (
 	cacheConfig = pgipfsethdb.CacheConfig{
 		Name:           "db",
@ -70,18 +54,6 @@ var testdata1 = []kvs{
 	{"foo", 7},
 }

-var testdata2 = []kvs{
-	{"aardvark", 8},
-	{"bar", 9},
-	{"barb", 10},
-	{"bars", 11},
-	{"fab", 12},
-	{"foo", 13},
-	{"foos", 14},
-	{"food", 15},
-	{"jars", 16},
-}
-
 func TestEmptyIterator(t *testing.T) {
 	trie := trie.NewEmpty(trie.NewDatabase(rawdb.NewMemoryDatabase()))
 	iter := trie.NodeIterator(nil)
@ -95,62 +67,6 @@ func TestEmptyIterator(t *testing.T) {
 	}
 }

-func updateTrie(tr *geth_trie.Trie, vals []kvs) (kvMap, error) {
-	all := kvMap{}
-	for _, val := range vals {
-		all[val.k] = val.v
-		acct := &types.StateAccount{
-			Balance:  big.NewInt(val.v),
-			CodeHash: test_helpers.NullCodeHash.Bytes(),
-			Root:     test_helpers.EmptyContractRoot,
-		}
-		acct_rlp, err := rlp.EncodeToBytes(acct)
-		if err != nil {
-			return nil, err
-		}
-		tr.Update([]byte(val.k), acct_rlp)
-	}
-	return all, nil
-}
-
-func commitTrie(t *testing.T, db *geth_trie.Database, tr *geth_trie.Trie) common.Hash {
-	root, nodes := tr.Commit(false)
-	if err := db.Update(geth_trie.NewWithNodeSet(nodes)); err != nil {
-		t.Fatal(err)
-	}
-	if err := db.Commit(root, false); err != nil {
-		t.Fatal(err)
-	}
-	return root
-}
-
-// commit a LevelDB state trie, index to IPLD and return new trie
-func indexTrie(t *testing.T, edb ethdb.Database, root common.Hash) *trie.Trie {
-	dbConfig.Driver = postgres.PGX
-	err := helper.IndexChain(dbConfig, geth_state.NewDatabase(edb), common.Hash{}, root)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	pg_db, err := postgres.ConnectSQLX(ctx, dbConfig)
-	if err != nil {
-		t.Fatal(err)
-	}
-	t.Cleanup(func() {
-		if err := TearDownDB(pg_db); err != nil {
-			t.Fatal(err)
-		}
-	})
-
-	ipfs_db := pgipfsethdb.NewDatabase(pg_db, makeCacheConfig(t))
-	sdb_db := state.NewDatabase(ipfs_db)
-	tr, err := trie.New(common.Hash{}, root, sdb_db.TrieDB(), ipld.MEthStateTrie)
-	if err != nil {
-		t.Fatal(err)
-	}
-	return tr
-}
-
 func TestIterator(t *testing.T) {
 	edb := rawdb.NewMemoryDatabase()
 	db := geth_trie.NewDatabase(edb)
@ -174,19 +90,15 @@ func TestIterator(t *testing.T) {
 	found := make(map[string]int64)
 	it := trie.NewIterator(tr.NodeIterator(nil))
 	for it.Next() {
-		var acct types.StateAccount
-		if err := rlp.DecodeBytes(it.Value, &acct); err != nil {
-			t.Fatal(err)
-		}
-		found[string(it.Key)] = acct.Balance.Int64()
+		found[string(it.Key)] = unpackValue(it.Value)
 	}

 	if len(found) != len(all) {
 		t.Errorf("number of iterated values do not match: want %d, found %d", len(all), len(found))
 	}
-	for k, v := range all {
-		if found[k] != v {
-			t.Errorf("iterator value mismatch for %s: got %q want %q", k, found[k], v)
+	for k, kv := range all {
+		if found[k] != kv.v {
+			t.Errorf("iterator value mismatch for %s: got %q want %q", k, found[k], kv.v)
 		}
 	}
 }
@ -237,7 +149,7 @@ func TestIteratorSeek(t *testing.T) {
 }

 // returns a cache config with unique name (groupcache names are global)
-func makeCacheConfig(t *testing.T) pgipfsethdb.CacheConfig {
+func makeCacheConfig(t testing.TB) pgipfsethdb.CacheConfig {
 	return pgipfsethdb.CacheConfig{
 		Name:           t.Name(),
 		Size:           3000000, // 3MB
--- a/trie_by_cid/trie/proof.go
+++ b/trie_by_cid/trie/proof.go
@ -18,14 +18,16 @@ package trie

 import (
 	"bytes"
-	"errors"
 	"fmt"

-	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/trie"
 )

+var VerifyProof = trie.VerifyProof
+var VerifyRangeProof = trie.VerifyRangeProof
+
 // Prove constructs a merkle proof for key. The result contains all encoded nodes
 // on the path to the value at key. The value itself is also included in the last
 // node and can be retrieved by verifying the proof.
@ -102,374 +104,3 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e
 func (t *StateTrie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) error {
 	return t.trie.Prove(key, fromLevel, proofDb)
 }
-
-// // VerifyProof checks merkle proofs. The given proof must contain the value for
-// // key in a trie with the given root hash. VerifyProof returns an error if the
-// // proof contains invalid trie nodes or the wrong value.
-// func VerifyProof(rootHash common.Hash, key []byte, proofDb ethdb.KeyValueReader) (value []byte, err error) {
-// 	key = keybytesToHex(key)
-// 	wantHash := rootHash
-// 	for i := 0; ; i++ {
-// 		buf, _ := proofDb.Get(wantHash[:])
-// 		if buf == nil {
-// 			return nil, fmt.Errorf("proof node %d (hash %064x) missing", i, wantHash)
-// 		}
-// 		n, err := decodeNode(wantHash[:], buf)
-// 		if err != nil {
-// 			return nil, fmt.Errorf("bad proof node %d: %v", i, err)
-// 		}
-// 		keyrest, cld := get(n, key, true)
-// 		switch cld := cld.(type) {
-// 		case nil:
-// 			// The trie doesn't contain the key.
-// 			return nil, nil
-// 		case hashNode:
-// 			key = keyrest
-// 			copy(wantHash[:], cld)
-// 		case valueNode:
-// 			return cld, nil
-// 		}
-// 	}
-// }
-
-// proofToPath converts a merkle proof to trie node path. The main purpose of
-// this function is recovering a node path from the merkle proof stream. All
-// necessary nodes will be resolved and leave the remaining as hashnode.
-//
-// The given edge proof is allowed to be an existent or non-existent proof.
-func proofToPath(rootHash common.Hash, root node, key []byte, proofDb ethdb.KeyValueReader, allowNonExistent bool) (node, []byte, error) {
-	// resolveNode retrieves and resolves trie node from merkle proof stream
-	resolveNode := func(hash common.Hash) (node, error) {
-		buf, _ := proofDb.Get(hash[:])
-		if buf == nil {
-			return nil, fmt.Errorf("proof node (hash %064x) missing", hash)
-		}
-		n, err := decodeNode(hash[:], buf)
-		if err != nil {
-			return nil, fmt.Errorf("bad proof node %v", err)
-		}
-		return n, err
-	}
-	// If the root node is empty, resolve it first.
-	// Root node must be included in the proof.
-	if root == nil {
-		n, err := resolveNode(rootHash)
-		if err != nil {
-			return nil, nil, err
-		}
-		root = n
-	}
-	var (
-		err           error
-		child, parent node
-		keyrest       []byte
-		valnode       []byte
-	)
-	key, parent = keybytesToHex(key), root
-	for {
-		keyrest, child = get(parent, key, false)
-		switch cld := child.(type) {
-		case nil:
-			// The trie doesn't contain the key. It's possible
-			// the proof is a non-existing proof, but at least
-			// we can prove all resolved nodes are correct, it's
-			// enough for us to prove range.
-			if allowNonExistent {
-				return root, nil, nil
-			}
-			return nil, nil, errors.New("the node is not contained in trie")
-		case *shortNode:
-			key, parent = keyrest, child // Already resolved
-			continue
-		case *fullNode:
-			key, parent = keyrest, child // Already resolved
-			continue
-		case hashNode:
-			child, err = resolveNode(common.BytesToHash(cld))
-			if err != nil {
-				return nil, nil, err
-			}
-		case valueNode:
-			valnode = cld
-		}
-		// Link the parent and child.
-		switch pnode := parent.(type) {
-		case *shortNode:
-			pnode.Val = child
-		case *fullNode:
-			pnode.Children[key[0]] = child
-		default:
-			panic(fmt.Sprintf("%T: invalid node: %v", pnode, pnode))
-		}
-		if len(valnode) > 0 {
-			return root, valnode, nil // The whole path is resolved
-		}
-		key, parent = keyrest, child
-	}
-}
-
-// unsetInternal removes all internal node references(hashnode, embedded node).
-// It should be called after a trie is constructed with two edge paths. Also
-// the given boundary keys must be the one used to construct the edge paths.
-//
-// It's the key step for range proof. All visited nodes should be marked dirty
-// since the node content might be modified. Besides it can happen that some
-// fullnodes only have one child which is disallowed. But if the proof is valid,
-// the missing children will be filled, otherwise it will be thrown anyway.
-//
-// Note we have the assumption here the given boundary keys are different
-// and right is larger than left.
-func unsetInternal(n node, left []byte, right []byte) (bool, error) {
-	left, right = keybytesToHex(left), keybytesToHex(right)
-
-	// Step down to the fork point. There are two scenarios can happen:
-	// - the fork point is a shortnode: either the key of left proof or
-	//   right proof doesn't match with shortnode's key.
-	// - the fork point is a fullnode: both two edge proofs are allowed
-	//   to point to a non-existent key.
-	var (
-		pos    = 0
-		parent node
-
-		// fork indicator, 0 means no fork, -1 means proof is less, 1 means proof is greater
-		shortForkLeft, shortForkRight int
-	)
-findFork:
-	for {
-		switch rn := (n).(type) {
-		case *shortNode:
-			rn.flags = nodeFlag{dirty: true}
-
-			// If either the key of left proof or right proof doesn't match with
-			// shortnode, stop here and the forkpoint is the shortnode.
-			if len(left)-pos < len(rn.Key) {
-				shortForkLeft = bytes.Compare(left[pos:], rn.Key)
-			} else {
-				shortForkLeft = bytes.Compare(left[pos:pos+len(rn.Key)], rn.Key)
-			}
-			if len(right)-pos < len(rn.Key) {
-				shortForkRight = bytes.Compare(right[pos:], rn.Key)
-			} else {
-				shortForkRight = bytes.Compare(right[pos:pos+len(rn.Key)], rn.Key)
-			}
-			if shortForkLeft != 0 || shortForkRight != 0 {
-				break findFork
-			}
-			parent = n
-			n, pos = rn.Val, pos+len(rn.Key)
-		case *fullNode:
-			rn.flags = nodeFlag{dirty: true}
-
-			// If either the node pointed by left proof or right proof is nil,
-			// stop here and the forkpoint is the fullnode.
-			leftnode, rightnode := rn.Children[left[pos]], rn.Children[right[pos]]
-			if leftnode == nil || rightnode == nil || leftnode != rightnode {
-				break findFork
-			}
-			parent = n
-			n, pos = rn.Children[left[pos]], pos+1
-		default:
-			panic(fmt.Sprintf("%T: invalid node: %v", n, n))
-		}
-	}
-	switch rn := n.(type) {
-	case *shortNode:
-		// There can have these five scenarios:
-		// - both proofs are less than the trie path => no valid range
-		// - both proofs are greater than the trie path => no valid range
-		// - left proof is less and right proof is greater => valid range, unset the shortnode entirely
-		// - left proof points to the shortnode, but right proof is greater
-		// - right proof points to the shortnode, but left proof is less
-		if shortForkLeft == -1 && shortForkRight == -1 {
-			return false, errors.New("empty range")
-		}
-		if shortForkLeft == 1 && shortForkRight == 1 {
-			return false, errors.New("empty range")
-		}
-		if shortForkLeft != 0 && shortForkRight != 0 {
-			// The fork point is root node, unset the entire trie
-			if parent == nil {
-				return true, nil
-			}
-			parent.(*fullNode).Children[left[pos-1]] = nil
-			return false, nil
-		}
-		// Only one proof points to non-existent key.
-		if shortForkRight != 0 {
-			if _, ok := rn.Val.(valueNode); ok {
-				// The fork point is root node, unset the entire trie
-				if parent == nil {
-					return true, nil
-				}
-				parent.(*fullNode).Children[left[pos-1]] = nil
-				return false, nil
-			}
-			return false, unset(rn, rn.Val, left[pos:], len(rn.Key), false)
-		}
-		if shortForkLeft != 0 {
-			if _, ok := rn.Val.(valueNode); ok {
-				// The fork point is root node, unset the entire trie
-				if parent == nil {
-					return true, nil
-				}
-				parent.(*fullNode).Children[right[pos-1]] = nil
-				return false, nil
-			}
-			return false, unset(rn, rn.Val, right[pos:], len(rn.Key), true)
-		}
-		return false, nil
-	case *fullNode:
-		// unset all internal nodes in the forkpoint
-		for i := left[pos] + 1; i < right[pos]; i++ {
-			rn.Children[i] = nil
-		}
-		if err := unset(rn, rn.Children[left[pos]], left[pos:], 1, false); err != nil {
-			return false, err
-		}
-		if err := unset(rn, rn.Children[right[pos]], right[pos:], 1, true); err != nil {
-			return false, err
-		}
-		return false, nil
-	default:
-		panic(fmt.Sprintf("%T: invalid node: %v", n, n))
-	}
-}
-
-// unset removes all internal node references either the left most or right most.
-// It can meet these scenarios:
-//
-//   - The given path is existent in the trie, unset the associated nodes with the
-//     specific direction
-//   - The given path is non-existent in the trie
-//   - the fork point is a fullnode, the corresponding child pointed by path
-//     is nil, return
-//   - the fork point is a shortnode, the shortnode is included in the range,
-//     keep the entire branch and return.
-//   - the fork point is a shortnode, the shortnode is excluded in the range,
-//     unset the entire branch.
-func unset(parent node, child node, key []byte, pos int, removeLeft bool) error {
-	switch cld := child.(type) {
-	case *fullNode:
-		if removeLeft {
-			for i := 0; i < int(key[pos]); i++ {
-				cld.Children[i] = nil
-			}
-			cld.flags = nodeFlag{dirty: true}
-		} else {
-			for i := key[pos] + 1; i < 16; i++ {
-				cld.Children[i] = nil
-			}
-			cld.flags = nodeFlag{dirty: true}
-		}
-		return unset(cld, cld.Children[key[pos]], key, pos+1, removeLeft)
-	case *shortNode:
-		if len(key[pos:]) < len(cld.Key) || !bytes.Equal(cld.Key, key[pos:pos+len(cld.Key)]) {
-			// Find the fork point, it's an non-existent branch.
-			if removeLeft {
-				if bytes.Compare(cld.Key, key[pos:]) < 0 {
-					// The key of fork shortnode is less than the path
-					// (it belongs to the range), unset the entrie
-					// branch. The parent must be a fullnode.
-					fn := parent.(*fullNode)
-					fn.Children[key[pos-1]] = nil
-				}
-				//else {
-				// The key of fork shortnode is greater than the
-				// path(it doesn't belong to the range), keep
-				// it with the cached hash available.
-				//}
-			} else {
-				if bytes.Compare(cld.Key, key[pos:]) > 0 {
-					// The key of fork shortnode is greater than the
-					// path(it belongs to the range), unset the entrie
-					// branch. The parent must be a fullnode.
-					fn := parent.(*fullNode)
-					fn.Children[key[pos-1]] = nil
-				}
-				//else {
-				// The key of fork shortnode is less than the
-				// path(it doesn't belong to the range), keep
-				// it with the cached hash available.
-				//}
-			}
-			return nil
-		}
-		if _, ok := cld.Val.(valueNode); ok {
-			fn := parent.(*fullNode)
-			fn.Children[key[pos-1]] = nil
-			return nil
-		}
-		cld.flags = nodeFlag{dirty: true}
-		return unset(cld, cld.Val, key, pos+len(cld.Key), removeLeft)
-	case nil:
-		// If the node is nil, then it's a child of the fork point
-		// fullnode(it's a non-existent branch).
-		return nil
-	default:
-		panic("it shouldn't happen") // hashNode, valueNode
-	}
-}
-
-// hasRightElement returns the indicator whether there exists more elements
-// on the right side of the given path. The given path can point to an existent
-// key or a non-existent one. This function has the assumption that the whole
-// path should already be resolved.
-func hasRightElement(node node, key []byte) bool {
-	pos, key := 0, keybytesToHex(key)
-	for node != nil {
-		switch rn := node.(type) {
-		case *fullNode:
-			for i := key[pos] + 1; i < 16; i++ {
-				if rn.Children[i] != nil {
-					return true
-				}
-			}
-			node, pos = rn.Children[key[pos]], pos+1
-		case *shortNode:
-			if len(key)-pos < len(rn.Key) || !bytes.Equal(rn.Key, key[pos:pos+len(rn.Key)]) {
-				return bytes.Compare(rn.Key, key[pos:]) > 0
-			}
-			node, pos = rn.Val, pos+len(rn.Key)
-		case valueNode:
-			return false // We have resolved the whole path
-		default:
-			panic(fmt.Sprintf("%T: invalid node: %v", node, node)) // hashnode
-		}
-	}
-	return false
-}
-
-// get returns the child of the given node. Return nil if the
-// node with specified key doesn't exist at all.
-//
-// There is an additional flag `skipResolved`. If it's set then
-// all resolved nodes won't be returned.
-func get(tn node, key []byte, skipResolved bool) ([]byte, node) {
-	for {
-		switch n := tn.(type) {
-		case *shortNode:
-			if len(key) < len(n.Key) || !bytes.Equal(n.Key, key[:len(n.Key)]) {
-				return nil, nil
-			}
-			tn = n.Val
-			key = key[len(n.Key):]
-			if !skipResolved {
-				return key, tn
-			}
-		case *fullNode:
-			tn = n.Children[key[0]]
-			key = key[1:]
-			if !skipResolved {
-				return key, tn
-			}
-		case hashNode:
-			return key, n
-		case nil:
-			return key, nil
-		case valueNode:
-			return nil, n
-		default:
-			panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
-		}
-	}
-}
--- a/trie_by_cid/trie/proof_test.go
+++ b/trie_by_cid/trie/proof_test.go
--- a/trie_by_cid/trie/util_test.go
+++ b/trie_by_cid/trie/util_test.go
@ -2,10 +2,149 @@ package trie_test

 import (
 	"fmt"
+	"math/big"
+	"math/rand"
+	"testing"

 	"github.com/jmoiron/sqlx"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	geth_state "github.com/ethereum/go-ethereum/core/state"
+	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/rlp"
+	geth_trie "github.com/ethereum/go-ethereum/trie"
+
+	pgipfsethdb "github.com/cerc-io/ipfs-ethdb/v5/postgres/v0"
+	"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/helper"
+	"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/state"
+	"github.com/cerc-io/ipld-eth-statedb/trie_by_cid/trie"
+	"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"
+	"github.com/ethereum/go-ethereum/statediff/indexer/ipld"
+	"github.com/ethereum/go-ethereum/statediff/test_helpers"
 )

+type kv struct {
+	k []byte
+	v int64
+}
+
+type kvMap map[string]*kv
+
+type kvs struct {
+	k string
+	v int64
+}
+
+func packValue(val int64) []byte {
+	acct := &types.StateAccount{
+		Balance:  big.NewInt(val),
+		CodeHash: test_helpers.NullCodeHash.Bytes(),
+		Root:     test_helpers.EmptyContractRoot,
+	}
+	acct_rlp, err := rlp.EncodeToBytes(acct)
+	if err != nil {
+		panic(err)
+	}
+	return acct_rlp
+}
+
+func unpackValue(val []byte) int64 {
+	var acct types.StateAccount
+	if err := rlp.DecodeBytes(val, &acct); err != nil {
+		panic(err)
+	}
+	return acct.Balance.Int64()
+}
+
+func updateTrie(tr *geth_trie.Trie, vals []kvs) (kvMap, error) {
+	all := kvMap{}
+	for _, val := range vals {
+		all[string(val.k)] = &kv{[]byte(val.k), val.v}
+		tr.Update([]byte(val.k), packValue(val.v))
+	}
+	return all, nil
+}
+
+func commitTrie(t testing.TB, db *geth_trie.Database, tr *geth_trie.Trie) common.Hash {
+	t.Helper()
+	root, nodes := tr.Commit(false)
+	if err := db.Update(geth_trie.NewWithNodeSet(nodes)); err != nil {
+		t.Fatal(err)
+	}
+	if err := db.Commit(root, false); err != nil {
+		t.Fatal(err)
+	}
+	return root
+}
+
+// commit a LevelDB state trie, index to IPLD and return new trie
+func indexTrie(t testing.TB, edb ethdb.Database, root common.Hash) *trie.Trie {
+	t.Helper()
+	dbConfig.Driver = postgres.PGX
+	err := helper.IndexChain(dbConfig, geth_state.NewDatabase(edb), common.Hash{}, root)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	pg_db, err := postgres.ConnectSQLX(ctx, dbConfig)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() {
+		if err := TearDownDB(pg_db); err != nil {
+			t.Fatal(err)
+		}
+	})
+
+	ipfs_db := pgipfsethdb.NewDatabase(pg_db, makeCacheConfig(t))
+	sdb_db := state.NewDatabase(ipfs_db)
+	tr, err := trie.New(common.Hash{}, root, sdb_db.TrieDB(), ipld.MEthStateTrie)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return tr
+}
+
+// generates a random Geth LevelDB trie of n key-value pairs and corresponding value map
+func randomGethTrie(n int, db *geth_trie.Database) (*geth_trie.Trie, kvMap) {
+	trie := geth_trie.NewEmpty(db)
+	var vals []*kv
+	for i := byte(0); i < 100; i++ {
+		e := &kv{common.LeftPadBytes([]byte{i}, 32), int64(i)}
+		e2 := &kv{common.LeftPadBytes([]byte{i + 10}, 32), int64(i)}
+		vals = append(vals, e, e2)
+	}
+	for i := 0; i < n; i++ {
+		k := randBytes(32)
+		v := rand.Int63()
+		vals = append(vals, &kv{k, v})
+	}
+	all := kvMap{}
+	for _, val := range vals {
+		all[string(val.k)] = &kv{[]byte(val.k), val.v}
+		trie.Update([]byte(val.k), packValue(val.v))
+	}
+	return trie, all
+}
+
+// generates a random IPLD-indexed trie
+func randomTrie(t testing.TB, n int) (*trie.Trie, kvMap) {
+	edb := rawdb.NewMemoryDatabase()
+	db := geth_trie.NewDatabase(edb)
+	orig, vals := randomGethTrie(n, db)
+	root := commitTrie(t, db, orig)
+	trie := indexTrie(t, edb, root)
+	return trie, vals
+}
+
+func randBytes(n int) []byte {
+	r := make([]byte, n)
+	rand.Read(r)
+	return r
+}
+
 // TearDownDB is used to tear down the watcher dbs after tests
 func TearDownDB(db *sqlx.DB) error {
 	tx, err := db.Beginx()