From 33e3d2f0ec5196913bf6f979486cb1afad9afcf7 Mon Sep 17 00:00:00 2001 From: Roy Crihfield Date: Tue, 28 Mar 2023 21:04:05 +0800 Subject: [PATCH] Implement StateDB using IPFS-backed trie and supporting types --- bycid/state/database.go | 128 +++++++++ bycid/state/state_object.go | 211 +++++++++++++++ bycid/state/statedb.go | 369 +++++++++++++++++++++++++ bycid/trie/database.go | 131 +++++++++ bycid/trie/database_test.go | 32 +++ bycid/trie/encoding.go | 33 +++ bycid/trie/errors.go | 46 ++++ bycid/trie/hasher.go | 210 +++++++++++++++ bycid/trie/node.go | 242 +++++++++++++++++ bycid/trie/node_enc.go | 60 +++++ bycid/trie/node_test.go | 94 +++++++ bycid/trie/proof.go | 475 +++++++++++++++++++++++++++++++++ bycid/trie/secure_trie.go | 114 ++++++++ bycid/trie/trie.go | 155 +++++++++++ ipld/util.go | 36 +++ pkg/trie_builder_utils/util.go | 12 +- 16 files changed, 2342 insertions(+), 6 deletions(-) create mode 100644 bycid/state/database.go create mode 100644 bycid/state/state_object.go create mode 100644 bycid/state/statedb.go create mode 100644 bycid/trie/database.go create mode 100644 bycid/trie/database_test.go create mode 100644 bycid/trie/encoding.go create mode 100644 bycid/trie/errors.go create mode 100644 bycid/trie/hasher.go create mode 100644 bycid/trie/node.go create mode 100644 bycid/trie/node_enc.go create mode 100644 bycid/trie/node_test.go create mode 100644 bycid/trie/proof.go create mode 100644 bycid/trie/secure_trie.go create mode 100644 bycid/trie/trie.go create mode 100644 ipld/util.go diff --git a/bycid/state/database.go b/bycid/state/database.go new file mode 100644 index 0000000..c1ff9a8 --- /dev/null +++ b/bycid/state/database.go @@ -0,0 +1,128 @@ +package state + +import ( + "errors" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/statediff/indexer/ipld" + lru "github.com/hashicorp/golang-lru" + + "github.com/cerc-io/ipld-eth-utils/bycid/trie" +) + +const ( + // Number of codehash->size associations to keep. + codeSizeCacheSize = 100000 + + // Cache size granted for caching clean code. + codeCacheSize = 64 * 1024 * 1024 +) + +// Database wraps access to tries and contract code. +type Database interface { + // OpenTrie opens the main account trie. + OpenTrie(root common.Hash) (Trie, error) + + // OpenStorageTrie opens the storage trie of an account. + OpenStorageTrie(addrHash, root common.Hash) (Trie, error) + + // ContractCode retrieves a particular contract's code. + ContractCode(codeHash common.Hash) ([]byte, error) + + // ContractCodeSize retrieves a particular contracts code's size. + ContractCodeSize(codeHash common.Hash) (int, error) + + // TrieDB retrieves the low level trie database used for data storage. + TrieDB() *trie.Database +} + +// Trie is a Ethereum Merkle Patricia trie. +type Trie interface { + TryGet(key []byte) ([]byte, error) + TryGetAccount(key []byte) (*types.StateAccount, error) + Hash() common.Hash + // NodeIterator(startKey []byte) trie.NodeIterator + Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) error +} + +// NewDatabase creates a backing store for state. The returned database is safe for +// concurrent use, but does not retain any recent trie nodes in memory. To keep some +// historical state in memory, use the NewDatabaseWithConfig constructor. +func NewDatabase(db ethdb.Database) Database { + return NewDatabaseWithConfig(db, nil) +} + +// NewDatabaseWithConfig creates a backing store for state. The returned database +// is safe for concurrent use and retains a lot of collapsed RLP trie nodes in a +// large memory cache. +func NewDatabaseWithConfig(db ethdb.Database, config *trie.Config) Database { + csc, _ := lru.New(codeSizeCacheSize) + return &cachingDB{ + db: trie.NewDatabaseWithConfig(db, config), + codeSizeCache: csc, + codeCache: fastcache.New(codeCacheSize), + } +} + +type cachingDB struct { + db *trie.Database + codeSizeCache *lru.Cache + codeCache *fastcache.Cache +} + +// OpenTrie opens the main account trie at a specific root hash. +func (db *cachingDB) OpenTrie(root common.Hash) (Trie, error) { + tr, err := trie.NewStateTrie(common.Hash{}, root, db.db) + if err != nil { + return nil, err + } + return tr, nil +} + +// OpenStorageTrie opens the storage trie of an account. +func (db *cachingDB) OpenStorageTrie(addrHash, root common.Hash) (Trie, error) { + tr, err := trie.NewStorageTrie(addrHash, root, db.db) + if err != nil { + return nil, err + } + return tr, nil +} + +// ContractCode retrieves a particular contract's code. +func (db *cachingDB) ContractCode(codeHash common.Hash) ([]byte, error) { + if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { + return code, nil + } + // TODO - use non panicking + codeCID := ipld.Keccak256ToCid(ipld.RawBinary, codeHash.Bytes()) + // if err != nil { + // return nil, err + // } + code, err := db.db.DiskDB().Get(codeCID.Bytes()) + if err != nil { + return nil, err + } + if len(code) > 0 { + db.codeCache.Set(codeHash.Bytes(), code) + db.codeSizeCache.Add(codeHash, len(code)) + return code, nil + } + return nil, errors.New("not found") +} + +// ContractCodeSize retrieves a particular contracts code's size. +func (db *cachingDB) ContractCodeSize(codeHash common.Hash) (int, error) { + if cached, ok := db.codeSizeCache.Get(codeHash); ok { + return cached.(int), nil + } + code, err := db.ContractCode(codeHash) + return len(code), err +} + +// TrieDB retrieves any intermediate trie-node caching layer. +func (db *cachingDB) TrieDB() *trie.Database { + return db.db +} diff --git a/bycid/state/state_object.go b/bycid/state/state_object.go new file mode 100644 index 0000000..6b53c67 --- /dev/null +++ b/bycid/state/state_object.go @@ -0,0 +1,211 @@ +package state + +import ( + "bytes" + "fmt" + "math/big" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" +) + +var ( + // emptyRoot is the known root hash of an empty trie. + // this is calculated as: emptyRoot = crypto.Keccak256(rlp.Encode([][]byte{})) + // that is, the keccak356 hash of the rlp encoding of an empty trie node (empty byte slice array) + emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") + // emptyCodeHash is the CodeHash for an EOA, for an account without contract code deployed + emptyCodeHash = crypto.Keccak256(nil) +) + +type Code []byte + +func (c Code) String() string { + return string(c) //strings.Join(Disassemble(c), " ") +} + +type Storage map[common.Hash]common.Hash + +func (s Storage) String() (str string) { + for key, value := range s { + str += fmt.Sprintf("%X : %X\n", key, value) + } + + return +} + +func (s Storage) Copy() Storage { + cpy := make(Storage, len(s)) + for key, value := range s { + cpy[key] = value + } + + return cpy +} + +// stateObject represents an Ethereum account which is being accessed. +// +// The usage pattern is as follows: +// First you need to obtain a state object. +// Account values can be accessed through the object. +type stateObject struct { + address common.Address + addrHash common.Hash // hash of ethereum address of the account + data types.StateAccount + db *StateDB + + // Caches. + trie Trie // storage trie, which becomes non-nil on first access + code Code // contract bytecode, which gets set when code is loaded + + originStorage Storage // Storage cache of original entries to dedup rewrites, reset for every transaction + fakeStorage Storage // Fake storage which constructed by caller for debugging purpose. +} + +// empty returns whether the account is considered empty. +func (s *stateObject) empty() bool { + return s.data.Nonce == 0 && s.data.Balance.Sign() == 0 && bytes.Equal(s.data.CodeHash, emptyCodeHash) +} + +// newObject creates a state object. +func newObject(db *StateDB, address common.Address, data types.StateAccount) *stateObject { + if data.Balance == nil { + data.Balance = new(big.Int) + } + if data.CodeHash == nil { + data.CodeHash = emptyCodeHash + } + if data.Root == (common.Hash{}) { + data.Root = emptyRoot + } + return &stateObject{ + db: db, + address: address, + addrHash: crypto.Keccak256Hash(address[:]), + data: data, + originStorage: make(Storage), + } +} + +// setError remembers the first non-nil error it is called with. +func (s *stateObject) setError(err error) { + s.db.setError(err) +} + +func (s *stateObject) getTrie(db Database) Trie { + if s.trie == nil { + // // Try fetching from prefetcher first + // // We don't prefetch empty tries + // if s.data.Root != emptyRoot && s.db.prefetcher != nil { + // // When the miner is creating the pending state, there is no + // // prefetcher + // s.trie = s.db.prefetcher.trie(s.addrHash, s.data.Root) + // } + if s.trie == nil { + var err error + s.trie, err = db.OpenStorageTrie(s.addrHash, s.data.Root) + if err != nil { + s.trie, _ = db.OpenStorageTrie(s.addrHash, common.Hash{}) + s.setError(fmt.Errorf("can't create storage trie: %w", err)) + } + } + } + return s.trie +} + +// GetCommittedState retrieves a value from the committed account storage trie. +func (s *stateObject) GetState(db Database, key common.Hash) common.Hash { + // If the fake storage is set, only lookup the state here(in the debugging mode) + if s.fakeStorage != nil { + return s.fakeStorage[key] + } + // If we have a cached value, return that + if value, cached := s.originStorage[key]; cached { + return value + } + // If no live objects are available, load from the database. + start := time.Now() + enc, err := s.getTrie(db).TryGet(key.Bytes()) + if metrics.EnabledExpensive { + s.db.StorageReads += time.Since(start) + } + if err != nil { + s.setError(err) + return common.Hash{} + } + var value common.Hash + if len(enc) > 0 { + _, content, _, err := rlp.Split(enc) + if err != nil { + s.setError(err) + } + value.SetBytes(content) + } + s.originStorage[key] = value + return value +} + +// +// Attribute accessors +// + +// Address returns the address of the contract/account +func (s *stateObject) Address() common.Address { + return s.address +} + +// Code returns the contract code associated with this object, if any. +func (s *stateObject) Code(db Database) []byte { + if s.code != nil { + return s.code + } + if bytes.Equal(s.CodeHash(), emptyCodeHash) { + return nil + } + code, err := db.ContractCode(common.BytesToHash(s.CodeHash())) + if err != nil { + s.setError(fmt.Errorf("can't load code hash %x: %v", s.CodeHash(), err)) + } + s.code = code + return code +} + +// CodeSize returns the size of the contract code associated with this object, +// or zero if none. This method is an almost mirror of Code, but uses a cache +// inside the database to avoid loading codes seen recently. +func (s *stateObject) CodeSize(db Database) int { + if s.code != nil { + return len(s.code) + } + if bytes.Equal(s.CodeHash(), emptyCodeHash) { + return 0 + } + size, err := db.ContractCodeSize(common.BytesToHash(s.CodeHash())) + if err != nil { + s.setError(fmt.Errorf("can't load code size %x: %v", s.CodeHash(), err)) + } + return size +} + +func (s *stateObject) CodeHash() []byte { + return s.data.CodeHash +} + +func (s *stateObject) Balance() *big.Int { + return s.data.Balance +} + +func (s *stateObject) Nonce() uint64 { + return s.data.Nonce +} + +// Never called, but must be present to allow stateObject to be used +// as a vm.Account interface that also satisfies the vm.ContractRef +// interface. Interfaces are awesome. +func (s *stateObject) Value() *big.Int { + panic("Value on stateObject should never be called") +} diff --git a/bycid/state/statedb.go b/bycid/state/statedb.go new file mode 100644 index 0000000..fc0c0b8 --- /dev/null +++ b/bycid/state/statedb.go @@ -0,0 +1,369 @@ +package state + +import ( + "errors" + "fmt" + "math/big" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/metrics" +) + +type proofList [][]byte + +func (n *proofList) Put(key []byte, value []byte) error { + *n = append(*n, value) + return nil +} + +func (n *proofList) Delete(key []byte) error { + panic("not supported") +} + +// StateDB structs within the ethereum protocol are used to store anything +// within the merkle trie. StateDBs take care of caching and storing +// nested states. It's the general query interface to retrieve: +// * Contracts +// * Accounts +// +// This implementation is read-only and performs no journaling, prefetching, or metrics tracking. +type StateDB struct { + db Database + trie Trie + hasher crypto.KeccakState + + // This map holds 'live' objects, which will get modified while processing a state transition. + stateObjects map[common.Address]*stateObject + + // DB error. + // State objects are used by the consensus core and VM which are + // unable to deal with database-level errors. Any error that occurs + // during a database read is memoized here and will eventually be returned + // by StateDB.Commit. + dbErr error + + preimages map[common.Hash][]byte + + // Measurements gathered during execution for debugging purposes + AccountReads time.Duration + StorageReads time.Duration +} + +// New creates a new state from a given trie. +func New(root common.Hash, db Database) (*StateDB, error) { + tr, err := db.OpenTrie(root) + if err != nil { + return nil, err + } + sdb := &StateDB{ + db: db, + trie: tr, + stateObjects: make(map[common.Address]*stateObject), + preimages: make(map[common.Hash][]byte), + hasher: crypto.NewKeccakState(), + } + return sdb, nil +} + +// setError remembers the first non-nil error it is called with. +func (s *StateDB) setError(err error) { + if s.dbErr == nil { + s.dbErr = err + } +} + +func (s *StateDB) Error() error { + return s.dbErr +} + +func (s *StateDB) AddLog(log *types.Log) { + panic("unsupported") +} + +// AddPreimage records a SHA3 preimage seen by the VM. +func (s *StateDB) AddPreimage(hash common.Hash, preimage []byte) { + if _, ok := s.preimages[hash]; !ok { + pi := make([]byte, len(preimage)) + copy(pi, preimage) + s.preimages[hash] = pi + } +} + +// Preimages returns a list of SHA3 preimages that have been submitted. +func (s *StateDB) Preimages() map[common.Hash][]byte { + return s.preimages +} + +// AddRefund adds gas to the refund counter +func (s *StateDB) AddRefund(gas uint64) { + panic("unsupported") +} + +// SubRefund removes gas from the refund counter. +// This method will panic if the refund counter goes below zero +func (s *StateDB) SubRefund(gas uint64) { + panic("unsupported") +} + +// Exist reports whether the given account address exists in the state. +// Notably this also returns true for suicided accounts. +func (s *StateDB) Exist(addr common.Address) bool { + return s.getStateObject(addr) != nil +} + +// Empty returns whether the state object is either non-existent +// or empty according to the EIP161 specification (balance = nonce = code = 0) +func (s *StateDB) Empty(addr common.Address) bool { + so := s.getStateObject(addr) + return so == nil || so.empty() +} + +// GetBalance retrieves the balance from the given address or 0 if object not found +func (s *StateDB) GetBalance(addr common.Address) *big.Int { + stateObject := s.getStateObject(addr) + if stateObject != nil { + return stateObject.Balance() + } + return common.Big0 +} + +func (s *StateDB) GetNonce(addr common.Address) uint64 { + stateObject := s.getStateObject(addr) + if stateObject != nil { + return stateObject.Nonce() + } + + return 0 +} + +func (s *StateDB) GetCode(addr common.Address) []byte { + stateObject := s.getStateObject(addr) + if stateObject != nil { + return stateObject.Code(s.db) + } + return nil +} + +func (s *StateDB) GetCodeSize(addr common.Address) int { + stateObject := s.getStateObject(addr) + if stateObject != nil { + return stateObject.CodeSize(s.db) + } + return 0 +} + +func (s *StateDB) GetCodeHash(addr common.Address) common.Hash { + stateObject := s.getStateObject(addr) + if stateObject == nil { + return common.Hash{} + } + return common.BytesToHash(stateObject.CodeHash()) +} + +// GetState retrieves a value from the given account's storage trie. +func (s *StateDB) GetState(addr common.Address, hash common.Hash) common.Hash { + stateObject := s.getStateObject(addr) + if stateObject != nil { + return stateObject.GetState(s.db, hash) + } + return common.Hash{} +} + +// GetProof returns the Merkle proof for a given account. +func (s *StateDB) GetProof(addr common.Address) ([][]byte, error) { + return s.GetProofByHash(crypto.Keccak256Hash(addr.Bytes())) +} + +// GetProofByHash returns the Merkle proof for a given account. +func (s *StateDB) GetProofByHash(addrHash common.Hash) ([][]byte, error) { + var proof proofList + err := s.trie.Prove(addrHash[:], 0, &proof) + return proof, err +} + +// GetStorageProof returns the Merkle proof for given storage slot. +func (s *StateDB) GetStorageProof(a common.Address, key common.Hash) ([][]byte, error) { + var proof proofList + trie := s.StorageTrie(a) + if trie == nil { + return proof, errors.New("storage trie for requested address does not exist") + } + err := trie.Prove(crypto.Keccak256(key.Bytes()), 0, &proof) + return proof, err +} + +// GetCommittedState retrieves a value from the given account's committed storage trie. +func (s *StateDB) GetCommittedState(addr common.Address, hash common.Hash) common.Hash { + return s.GetState(addr, hash) +} + +// Database retrieves the low level database supporting the lower level trie ops. +func (s *StateDB) Database() Database { + return s.db +} + +// StorageTrie returns the storage trie of an account. +// The return value is a copy and is nil for non-existent accounts. +func (s *StateDB) StorageTrie(addr common.Address) Trie { + stateObject := s.getStateObject(addr) + if stateObject == nil { + return nil + } + return stateObject.getTrie(s.db) +} + +func (s *StateDB) HasSuicided(addr common.Address) bool { + return false +} + +/* + * SETTERS + */ + +// AddBalance adds amount to the account associated with addr. +func (s *StateDB) AddBalance(addr common.Address, amount *big.Int) { + panic("unsupported") +} + +// SubBalance subtracts amount from the account associated with addr. +func (s *StateDB) SubBalance(addr common.Address, amount *big.Int) { + panic("unsupported") +} + +func (s *StateDB) SetBalance(addr common.Address, amount *big.Int) { + panic("unsupported") +} + +func (s *StateDB) SetNonce(addr common.Address, nonce uint64) { + panic("unsupported") +} + +func (s *StateDB) SetCode(addr common.Address, code []byte) { + panic("unsupported") +} + +func (s *StateDB) SetState(addr common.Address, key, value common.Hash) { + panic("unsupported") +} + +// SetStorage replaces the entire storage for the specified account with given +// storage. This function should only be used for debugging. +func (s *StateDB) SetStorage(addr common.Address, storage map[common.Hash]common.Hash) { + panic("unsupported") +} + +// Suicide marks the given account as suicided. +// This clears the account balance. +// +// The account's state object is still available until the state is committed, +// getStateObject will return a non-nil account after Suicide. +func (s *StateDB) Suicide(addr common.Address) bool { + panic("unsupported") + return false +} + +// +// Setting, updating & deleting state object methods. +// + +// getStateObject retrieves a state object given by the address, returning nil if +// the object is not found or was deleted in this execution context. +func (s *StateDB) getStateObject(addr common.Address) *stateObject { + // Prefer live objects if any is available + if obj := s.stateObjects[addr]; obj != nil { + return obj + } + // If no live objects are available, load from the database + start := time.Now() + var err error + data, err := s.trie.TryGetAccount(addr.Bytes()) + if metrics.EnabledExpensive { + s.AccountReads += time.Since(start) + } + if err != nil { + s.setError(fmt.Errorf("getStateObject (%x) error: %w", addr.Bytes(), err)) + return nil + } + if data == nil { + return nil + } + + // Insert into the live set + obj := newObject(s, addr, *data) + s.setStateObject(obj) + return obj +} + +func (s *StateDB) setStateObject(object *stateObject) { + s.stateObjects[object.Address()] = object +} + +// CreateAccount explicitly creates a state object. If a state object with the address +// already exists the balance is carried over to the new account. +// +// CreateAccount is called during the EVM CREATE operation. The situation might arise that +// a contract does the following: +// +// 1. sends funds to sha(account ++ (nonce + 1)) +// 2. tx_create(sha(account ++ nonce)) (note that this gets the address of 1) +// +// Carrying over the balance ensures that Ether doesn't disappear. +func (s *StateDB) CreateAccount(addr common.Address) { + panic("unsupported") +} + +func (db *StateDB) ForEachStorage(addr common.Address, cb func(key, value common.Hash) bool) error { + return nil +} + +// Snapshot returns an identifier for the current revision of the state. +func (s *StateDB) Snapshot() int { + return 0 +} + +// RevertToSnapshot reverts all state changes made since the given revision. +func (s *StateDB) RevertToSnapshot(revid int) { + panic("unsupported") +} + +// GetRefund returns the current value of the refund counter. +func (s *StateDB) GetRefund() uint64 { + panic("unsupported") + return 0 +} + +// PrepareAccessList handles the preparatory steps for executing a state transition with +// regards to both EIP-2929 and EIP-2930: +// +// - Add sender to access list (2929) +// - Add destination to access list (2929) +// - Add precompiles to access list (2929) +// - Add the contents of the optional tx access list (2930) +// +// This method should only be called if Berlin/2929+2930 is applicable at the current number. +func (s *StateDB) PrepareAccessList(sender common.Address, dst *common.Address, precompiles []common.Address, list types.AccessList) { + panic("unsupported") +} + +// AddAddressToAccessList adds the given address to the access list +func (s *StateDB) AddAddressToAccessList(addr common.Address) { + panic("unsupported") +} + +// AddSlotToAccessList adds the given (address, slot)-tuple to the access list +func (s *StateDB) AddSlotToAccessList(addr common.Address, slot common.Hash) { + panic("unsupported") +} + +// AddressInAccessList returns true if the given address is in the access list. +func (s *StateDB) AddressInAccessList(addr common.Address) bool { + return false +} + +// SlotInAccessList returns true if the given (address, slot)-tuple is in the access list. +func (s *StateDB) SlotInAccessList(addr common.Address, slot common.Hash) (addressPresent bool, slotPresent bool) { + return +} diff --git a/bycid/trie/database.go b/bycid/trie/database.go new file mode 100644 index 0000000..82bad23 --- /dev/null +++ b/bycid/trie/database.go @@ -0,0 +1,131 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "errors" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" +) + +type CidBytes = []byte + +func isEmpty(key CidBytes) bool { + return len(key) == 0 +} + +// Database is an intermediate read-only layer between the trie data structures and +// the disk database. This trie Database is thread safe in providing individual, +// independent node access. +type Database struct { + diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes + cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs +} + +// Config defines all necessary options for database. +// (re-export) +type Config = trie.Config + +// NewDatabase creates a new trie database to store ephemeral trie content before +// its written out to disk or garbage collected. No read cache is created, so all +// data retrievals will hit the underlying disk database. +func NewDatabase(diskdb ethdb.KeyValueStore) *Database { + return NewDatabaseWithConfig(diskdb, nil) +} + +// NewDatabaseWithConfig creates a new trie database to store ephemeral trie content +// before it's written out to disk or garbage collected. It also acts as a read cache +// for nodes loaded from disk. +func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database { + var cleans *fastcache.Cache + if config != nil && config.Cache > 0 { + if config.Journal == "" { + cleans = fastcache.New(config.Cache * 1024 * 1024) + } else { + cleans = fastcache.LoadFromFileOrNew(config.Journal, config.Cache*1024*1024) + } + } + db := &Database{ + diskdb: diskdb, + cleans: cleans, + } + return db +} + +// DiskDB retrieves the persistent storage backing the trie database. +func (db *Database) DiskDB() ethdb.KeyValueStore { + return db.diskdb +} + +// node retrieves a cached trie node from memory, or returns nil if none can be +// found in the memory cache. +func (db *Database) node(key CidBytes) (node, error) { + // Retrieve the node from the clean cache if available + if db.cleans != nil { + if enc := db.cleans.Get(nil, key); enc != nil { + // The returned value from cache is in its own copy, + // safe to use mustDecodeNodeUnsafe for decoding. + return decodeNodeUnsafe(key, enc) + } + } + + // Content unavailable in memory, attempt to retrieve from disk + enc, err := db.diskdb.Get(key) + if err != nil { + return nil, err + } + if enc == nil { + return nil, nil + } + if db.cleans != nil { + db.cleans.Set(key, enc) + } + // The returned value from database is in its own copy, + // safe to use mustDecodeNodeUnsafe for decoding. + return decodeNodeUnsafe(key, enc) +} + +// Node retrieves an encoded cached trie node from memory. If it cannot be found +// cached, the method queries the persistent database for the content. +func (db *Database) Node(key CidBytes) ([]byte, error) { + // It doesn't make sense to retrieve the metaroot + if isEmpty(key) { + return nil, errors.New("not found") + } + // Retrieve the node from the clean cache if available + if db.cleans != nil { + if enc := db.cleans.Get(nil, key); enc != nil { + return enc, nil + } + } + + // Content unavailable in memory, attempt to retrieve from disk + enc, err := db.diskdb.Get(key) + if err != nil { + return nil, err + } + + if len(enc) != 0 { + if db.cleans != nil { + db.cleans.Set(key[:], enc) + } + return enc, nil + } + return nil, errors.New("not found") +} diff --git a/bycid/trie/database_test.go b/bycid/trie/database_test.go new file mode 100644 index 0000000..156ff18 --- /dev/null +++ b/bycid/trie/database_test.go @@ -0,0 +1,32 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "testing" + + "github.com/ethereum/go-ethereum/ethdb/memorydb" +) + +// Tests that the trie database returns a missing trie node error if attempting +// to retrieve the meta root. +func TestDatabaseMetarootFetch(t *testing.T) { + db := NewDatabase(memorydb.New()) + if _, err := db.Node(CidBytes(nil)); err == nil { + t.Fatalf("metaroot retrieval succeeded") + } +} diff --git a/bycid/trie/encoding.go b/bycid/trie/encoding.go new file mode 100644 index 0000000..d564701 --- /dev/null +++ b/bycid/trie/encoding.go @@ -0,0 +1,33 @@ +// Copyright 2014 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +func keybytesToHex(str []byte) []byte { + l := len(str)*2 + 1 + var nibbles = make([]byte, l) + for i, b := range str { + nibbles[i*2] = b / 16 + nibbles[i*2+1] = b % 16 + } + nibbles[l-1] = 16 + return nibbles +} + +// hasTerm returns whether a hex key has the terminator flag. +func hasTerm(s []byte) bool { + return len(s) > 0 && s[len(s)-1] == 16 +} diff --git a/bycid/trie/errors.go b/bycid/trie/errors.go new file mode 100644 index 0000000..3881487 --- /dev/null +++ b/bycid/trie/errors.go @@ -0,0 +1,46 @@ +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" +) + +// MissingNodeError is returned by the trie functions (TryGet, TryUpdate, TryDelete) +// in the case where a trie node is not present in the local database. It contains +// information necessary for retrieving the missing node. +type MissingNodeError struct { + Owner common.Hash // owner of the trie if it's 2-layered trie + NodeHash []byte // hash of the missing node + Path []byte // hex-encoded path to the missing node + err error // concrete error for missing trie node +} + +// Unwrap returns the concrete error for missing trie node which +// allows us for further analysis outside. +func (err *MissingNodeError) Unwrap() error { + return err.err +} + +func (err *MissingNodeError) Error() string { + if err.Owner == (common.Hash{}) { + return fmt.Sprintf("missing trie node %x (path %x) %v", err.NodeHash, err.Path, err.err) + } + return fmt.Sprintf("missing trie node %x (owner %x) (path %x) %v", err.NodeHash, err.Owner, err.Path, err.err) +} diff --git a/bycid/trie/hasher.go b/bycid/trie/hasher.go new file mode 100644 index 0000000..caa80f0 --- /dev/null +++ b/bycid/trie/hasher.go @@ -0,0 +1,210 @@ +// Copyright 2016 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "sync" + + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "golang.org/x/crypto/sha3" +) + +// hasher is a type used for the trie Hash operation. A hasher has some +// internal preallocated temp space +type hasher struct { + sha crypto.KeccakState + tmp []byte + encbuf rlp.EncoderBuffer + parallel bool // Whether to use parallel threads when hashing +} + +// hasherPool holds pureHashers +var hasherPool = sync.Pool{ + New: func() interface{} { + return &hasher{ + tmp: make([]byte, 0, 550), // cap is as large as a full fullNode. + sha: sha3.NewLegacyKeccak256().(crypto.KeccakState), + encbuf: rlp.NewEncoderBuffer(nil), + } + }, +} + +func newHasher(parallel bool) *hasher { + h := hasherPool.Get().(*hasher) + h.parallel = parallel + return h +} + +func returnHasherToPool(h *hasher) { + hasherPool.Put(h) +} + +// hash collapses a node down into a hash node, also returning a copy of the +// original node initialized with the computed hash to replace the original one. +func (h *hasher) hash(n node, force bool) (hashed node, cached node) { + // Return the cached hash if it's available + if hash, _ := n.cache(); hash != nil { + return hash, n + } + // Trie not processed yet, walk the children + switch n := n.(type) { + case *shortNode: + collapsed, cached := h.hashShortNodeChildren(n) + hashed := h.shortnodeToHash(collapsed, force) + // We need to retain the possibly _not_ hashed node, in case it was too + // small to be hashed + if hn, ok := hashed.(hashNode); ok { + cached.flags.hash = hn + } else { + cached.flags.hash = nil + } + return hashed, cached + case *fullNode: + collapsed, cached := h.hashFullNodeChildren(n) + hashed = h.fullnodeToHash(collapsed, force) + if hn, ok := hashed.(hashNode); ok { + cached.flags.hash = hn + } else { + cached.flags.hash = nil + } + return hashed, cached + default: + // Value and hash nodes don't have children so they're left as were + return n, n + } +} + +// hashShortNodeChildren collapses the short node. The returned collapsed node +// holds a live reference to the Key, and must not be modified. +// The cached +func (h *hasher) hashShortNodeChildren(n *shortNode) (collapsed, cached *shortNode) { + // Hash the short node's child, caching the newly hashed subtree + collapsed, cached = n.copy(), n.copy() + // Previously, we did copy this one. We don't seem to need to actually + // do that, since we don't overwrite/reuse keys + //cached.Key = common.CopyBytes(n.Key) + collapsed.Key = trie.HexToCompact(n.Key) + // Unless the child is a valuenode or hashnode, hash it + switch n.Val.(type) { + case *fullNode, *shortNode: + collapsed.Val, cached.Val = h.hash(n.Val, false) + } + return collapsed, cached +} + +func (h *hasher) hashFullNodeChildren(n *fullNode) (collapsed *fullNode, cached *fullNode) { + // Hash the full node's children, caching the newly hashed subtrees + cached = n.copy() + collapsed = n.copy() + if h.parallel { + var wg sync.WaitGroup + wg.Add(16) + for i := 0; i < 16; i++ { + go func(i int) { + hasher := newHasher(false) + if child := n.Children[i]; child != nil { + collapsed.Children[i], cached.Children[i] = hasher.hash(child, false) + } else { + collapsed.Children[i] = nilValueNode + } + returnHasherToPool(hasher) + wg.Done() + }(i) + } + wg.Wait() + } else { + for i := 0; i < 16; i++ { + if child := n.Children[i]; child != nil { + collapsed.Children[i], cached.Children[i] = h.hash(child, false) + } else { + collapsed.Children[i] = nilValueNode + } + } + } + return collapsed, cached +} + +// shortnodeToHash creates a hashNode from a shortNode. The supplied shortnode +// should have hex-type Key, which will be converted (without modification) +// into compact form for RLP encoding. +// If the rlp data is smaller than 32 bytes, `nil` is returned. +func (h *hasher) shortnodeToHash(n *shortNode, force bool) node { + n.encode(h.encbuf) + enc := h.encodedBytes() + + if len(enc) < 32 && !force { + return n // Nodes smaller than 32 bytes are stored inside their parent + } + return h.hashData(enc) +} + +// shortnodeToHash is used to creates a hashNode from a set of hashNodes, (which +// may contain nil values) +func (h *hasher) fullnodeToHash(n *fullNode, force bool) node { + n.encode(h.encbuf) + enc := h.encodedBytes() + + if len(enc) < 32 && !force { + return n // Nodes smaller than 32 bytes are stored inside their parent + } + return h.hashData(enc) +} + +// encodedBytes returns the result of the last encoding operation on h.encbuf. +// This also resets the encoder buffer. +// +// All node encoding must be done like this: +// +// node.encode(h.encbuf) +// enc := h.encodedBytes() +// +// This convention exists because node.encode can only be inlined/escape-analyzed when +// called on a concrete receiver type. +func (h *hasher) encodedBytes() []byte { + h.tmp = h.encbuf.AppendToBytes(h.tmp[:0]) + h.encbuf.Reset(nil) + return h.tmp +} + +// hashData hashes the provided data +func (h *hasher) hashData(data []byte) hashNode { + n := make(hashNode, 32) + h.sha.Reset() + h.sha.Write(data) + h.sha.Read(n) + return n +} + +// proofHash is used to construct trie proofs, and returns the 'collapsed' +// node (for later RLP encoding) as well as the hashed node -- unless the +// node is smaller than 32 bytes, in which case it will be returned as is. +// This method does not do anything on value- or hash-nodes. +func (h *hasher) proofHash(original node) (collapsed, hashed node) { + switch n := original.(type) { + case *shortNode: + sn, _ := h.hashShortNodeChildren(n) + return sn, h.shortnodeToHash(sn, false) + case *fullNode: + fn, _ := h.hashFullNodeChildren(n) + return fn, h.fullnodeToHash(fn, false) + default: + // Value and hash nodes don't have children so they're left as were + return n, n + } +} diff --git a/bycid/trie/node.go b/bycid/trie/node.go new file mode 100644 index 0000000..c995099 --- /dev/null +++ b/bycid/trie/node.go @@ -0,0 +1,242 @@ +// Copyright 2014 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "fmt" + "io" + "strings" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"} + +type node interface { + cache() (hashNode, bool) + encode(w rlp.EncoderBuffer) + fstring(string) string +} + +type ( + fullNode struct { + Children [17]node // Actual trie node data to encode/decode (needs custom encoder) + flags nodeFlag + } + shortNode struct { + Key []byte + Val node + flags nodeFlag + } + hashNode []byte + valueNode []byte +) + +// nilValueNode is used when collapsing internal trie nodes for hashing, since +// unset children need to serialize correctly. +var nilValueNode = valueNode(nil) + +// EncodeRLP encodes a full node into the consensus RLP format. +func (n *fullNode) EncodeRLP(w io.Writer) error { + eb := rlp.NewEncoderBuffer(w) + n.encode(eb) + return eb.Flush() +} + +func (n *fullNode) copy() *fullNode { copy := *n; return © } +func (n *shortNode) copy() *shortNode { copy := *n; return © } + +// nodeFlag contains caching-related metadata about a node. +type nodeFlag struct { + hash hashNode // cached hash of the node (may be nil) + dirty bool // whether the node has changes that must be written to the database +} + +func (n *fullNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty } +func (n *shortNode) cache() (hashNode, bool) { return n.flags.hash, n.flags.dirty } +func (n hashNode) cache() (hashNode, bool) { return nil, true } +func (n valueNode) cache() (hashNode, bool) { return nil, true } + +// Pretty printing. +func (n *fullNode) String() string { return n.fstring("") } +func (n *shortNode) String() string { return n.fstring("") } +func (n hashNode) String() string { return n.fstring("") } +func (n valueNode) String() string { return n.fstring("") } + +func (n *fullNode) fstring(ind string) string { + resp := fmt.Sprintf("[\n%s ", ind) + for i, node := range &n.Children { + if node == nil { + resp += fmt.Sprintf("%s: ", indices[i]) + } else { + resp += fmt.Sprintf("%s: %v", indices[i], node.fstring(ind+" ")) + } + } + return resp + fmt.Sprintf("\n%s] ", ind) +} +func (n *shortNode) fstring(ind string) string { + return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" ")) +} +func (n hashNode) fstring(ind string) string { + return fmt.Sprintf("<%x> ", []byte(n)) +} +func (n valueNode) fstring(ind string) string { + return fmt.Sprintf("%x ", []byte(n)) +} + +// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered. +func mustDecodeNode(hash, buf []byte) node { + n, err := decodeNode(hash, buf) + if err != nil { + panic(fmt.Sprintf("node %x: %v", hash, err)) + } + return n +} + +// mustDecodeNodeUnsafe is a wrapper of decodeNodeUnsafe and panic if any error is +// encountered. +func mustDecodeNodeUnsafe(hash, buf []byte) node { + n, err := decodeNodeUnsafe(hash, buf) + if err != nil { + panic(fmt.Sprintf("node %x: %v", hash, err)) + } + return n +} + +// decodeNode parses the RLP encoding of a trie node. It will deep-copy the passed +// byte slice for decoding, so it's safe to modify the byte slice afterwards. The- +// decode performance of this function is not optimal, but it is suitable for most +// scenarios with low performance requirements and hard to determine whether the +// byte slice be modified or not. +func decodeNode(hash, buf []byte) (node, error) { + return decodeNodeUnsafe(hash, common.CopyBytes(buf)) +} + +// decodeNodeUnsafe parses the RLP encoding of a trie node. The passed byte slice +// will be directly referenced by node without bytes deep copy, so the input MUST +// not be changed after. +func decodeNodeUnsafe(hash, buf []byte) (node, error) { + if len(buf) == 0 { + return nil, io.ErrUnexpectedEOF + } + elems, _, err := rlp.SplitList(buf) + if err != nil { + return nil, fmt.Errorf("decode error: %v", err) + } + switch c, _ := rlp.CountValues(elems); c { + case 2: + n, err := decodeShort(hash, elems) + return n, wrapError(err, "short") + case 17: + n, err := decodeFull(hash, elems) + return n, wrapError(err, "full") + default: + return nil, fmt.Errorf("invalid number of list elements: %v", c) + } +} + +func decodeShort(hash, elems []byte) (node, error) { + kbuf, rest, err := rlp.SplitString(elems) + if err != nil { + return nil, err + } + flag := nodeFlag{hash: hash} + key := trie.CompactToHex(kbuf) + if hasTerm(key) { + // value node + val, _, err := rlp.SplitString(rest) + if err != nil { + return nil, fmt.Errorf("invalid value node: %v", err) + } + return &shortNode{key, valueNode(val), flag}, nil + } + r, _, err := decodeRef(rest) + if err != nil { + return nil, wrapError(err, "val") + } + return &shortNode{key, r, flag}, nil +} + +func decodeFull(hash, elems []byte) (*fullNode, error) { + n := &fullNode{flags: nodeFlag{hash: hash}} + for i := 0; i < 16; i++ { + cld, rest, err := decodeRef(elems) + if err != nil { + return n, wrapError(err, fmt.Sprintf("[%d]", i)) + } + n.Children[i], elems = cld, rest + } + val, _, err := rlp.SplitString(elems) + if err != nil { + return n, err + } + if len(val) > 0 { + n.Children[16] = valueNode(val) + } + return n, nil +} + +const hashLen = len(common.Hash{}) + +func decodeRef(buf []byte) (node, []byte, error) { + kind, val, rest, err := rlp.Split(buf) + if err != nil { + return nil, buf, err + } + switch { + case kind == rlp.List: + // 'embedded' node reference. The encoding must be smaller + // than a hash in order to be valid. + if size := len(buf) - len(rest); size > hashLen { + err := fmt.Errorf("oversized embedded node (size is %d bytes, want size < %d)", size, hashLen) + return nil, buf, err + } + n, err := decodeNode(nil, buf) + return n, rest, err + case kind == rlp.String && len(val) == 0: + // empty node + return nil, rest, nil + case kind == rlp.String && len(val) == 32: + return hashNode(val), rest, nil + default: + return nil, nil, fmt.Errorf("invalid RLP string size %d (want 0 or 32)", len(val)) + } +} + +// wraps a decoding error with information about the path to the +// invalid child node (for debugging encoding issues). +type decodeError struct { + what error + stack []string +} + +func wrapError(err error, ctx string) error { + if err == nil { + return nil + } + if decErr, ok := err.(*decodeError); ok { + decErr.stack = append(decErr.stack, ctx) + return decErr + } + return &decodeError{err, []string{ctx}} +} + +func (err *decodeError) Error() string { + return fmt.Sprintf("%v (decode path: %s)", err.what, strings.Join(err.stack, "<-")) +} diff --git a/bycid/trie/node_enc.go b/bycid/trie/node_enc.go new file mode 100644 index 0000000..2d26350 --- /dev/null +++ b/bycid/trie/node_enc.go @@ -0,0 +1,60 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "github.com/ethereum/go-ethereum/rlp" +) + +func nodeToBytes(n node) []byte { + w := rlp.NewEncoderBuffer(nil) + n.encode(w) + result := w.ToBytes() + w.Flush() + return result +} + +func (n *fullNode) encode(w rlp.EncoderBuffer) { + offset := w.List() + for _, c := range n.Children { + if c != nil { + c.encode(w) + } else { + w.Write(rlp.EmptyString) + } + } + w.ListEnd(offset) +} + +func (n *shortNode) encode(w rlp.EncoderBuffer) { + offset := w.List() + w.WriteBytes(n.Key) + if n.Val != nil { + n.Val.encode(w) + } else { + w.Write(rlp.EmptyString) + } + w.ListEnd(offset) +} + +func (n hashNode) encode(w rlp.EncoderBuffer) { + w.WriteBytes(n) +} + +func (n valueNode) encode(w rlp.EncoderBuffer) { + w.WriteBytes(n) +} diff --git a/bycid/trie/node_test.go b/bycid/trie/node_test.go new file mode 100644 index 0000000..ac1d8fb --- /dev/null +++ b/bycid/trie/node_test.go @@ -0,0 +1,94 @@ +// Copyright 2016 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/rlp" +) + +func newTestFullNode(v []byte) []interface{} { + fullNodeData := []interface{}{} + for i := 0; i < 16; i++ { + k := bytes.Repeat([]byte{byte(i + 1)}, 32) + fullNodeData = append(fullNodeData, k) + } + fullNodeData = append(fullNodeData, v) + return fullNodeData +} + +func TestDecodeNestedNode(t *testing.T) { + fullNodeData := newTestFullNode([]byte("fullnode")) + + data := [][]byte{} + for i := 0; i < 16; i++ { + data = append(data, nil) + } + data = append(data, []byte("subnode")) + fullNodeData[15] = data + + buf := bytes.NewBuffer([]byte{}) + rlp.Encode(buf, fullNodeData) + + if _, err := decodeNode([]byte("testdecode"), buf.Bytes()); err != nil { + t.Fatalf("decode nested full node err: %v", err) + } +} + +func TestDecodeFullNodeWrongSizeChild(t *testing.T) { + fullNodeData := newTestFullNode([]byte("wrongsizechild")) + fullNodeData[0] = []byte("00") + buf := bytes.NewBuffer([]byte{}) + rlp.Encode(buf, fullNodeData) + + _, err := decodeNode([]byte("testdecode"), buf.Bytes()) + if _, ok := err.(*decodeError); !ok { + t.Fatalf("decodeNode returned wrong err: %v", err) + } +} + +func TestDecodeFullNodeWrongNestedFullNode(t *testing.T) { + fullNodeData := newTestFullNode([]byte("fullnode")) + + data := [][]byte{} + for i := 0; i < 16; i++ { + data = append(data, []byte("123456")) + } + data = append(data, []byte("subnode")) + fullNodeData[15] = data + + buf := bytes.NewBuffer([]byte{}) + rlp.Encode(buf, fullNodeData) + + _, err := decodeNode([]byte("testdecode"), buf.Bytes()) + if _, ok := err.(*decodeError); !ok { + t.Fatalf("decodeNode returned wrong err: %v", err) + } +} + +func TestDecodeFullNode(t *testing.T) { + fullNodeData := newTestFullNode([]byte("decodefullnode")) + buf := bytes.NewBuffer([]byte{}) + rlp.Encode(buf, fullNodeData) + + _, err := decodeNode([]byte("testdecode"), buf.Bytes()) + if err != nil { + t.Fatalf("decode full node err: %v", err) + } +} diff --git a/bycid/trie/proof.go b/bycid/trie/proof.go new file mode 100644 index 0000000..0a4eea9 --- /dev/null +++ b/bycid/trie/proof.go @@ -0,0 +1,475 @@ +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "errors" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +// Prove constructs a merkle proof for key. The result contains all encoded nodes +// on the path to the value at key. The value itself is also included in the last +// node and can be retrieved by verifying the proof. +// +// If the trie does not contain a value for key, the returned proof contains all +// nodes of the longest existing prefix of the key (at least the root node), ending +// with the node that proves the absence of the key. +func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) error { + // Collect all nodes on the path to key. + var ( + prefix []byte + nodes []node + tn = t.root + ) + key = keybytesToHex(key) + for len(key) > 0 && tn != nil { + switch n := tn.(type) { + case *shortNode: + if len(key) < len(n.Key) || !bytes.Equal(n.Key, key[:len(n.Key)]) { + // The trie doesn't contain the key. + tn = nil + } else { + tn = n.Val + prefix = append(prefix, n.Key...) + key = key[len(n.Key):] + } + nodes = append(nodes, n) + case *fullNode: + tn = n.Children[key[0]] + prefix = append(prefix, key[0]) + key = key[1:] + nodes = append(nodes, n) + case hashNode: + var err error + tn, err = t.resolveHash(n, prefix) + if err != nil { + log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) + return err + } + default: + panic(fmt.Sprintf("%T: invalid node: %v", tn, tn)) + } + } + hasher := newHasher(false) + defer returnHasherToPool(hasher) + + for i, n := range nodes { + if fromLevel > 0 { + fromLevel-- + continue + } + var hn node + n, hn = hasher.proofHash(n) + if hash, ok := hn.(hashNode); ok || i == 0 { + // If the node's database encoding is a hash (or is the + // root node), it becomes a proof element. + enc := nodeToBytes(n) + if !ok { + hash = hasher.hashData(enc) + } + proofDb.Put(hash, enc) + } + } + return nil +} + +// Prove constructs a merkle proof for key. The result contains all encoded nodes +// on the path to the value at key. The value itself is also included in the last +// node and can be retrieved by verifying the proof. +// +// If the trie does not contain a value for key, the returned proof contains all +// nodes of the longest existing prefix of the key (at least the root node), ending +// with the node that proves the absence of the key. +func (t *StateTrie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) error { + return t.trie.Prove(key, fromLevel, proofDb) +} + +// VerifyProof checks merkle proofs. The given proof must contain the value for +// key in a trie with the given root hash. VerifyProof returns an error if the +// proof contains invalid trie nodes or the wrong value. +func VerifyProof(rootHash common.Hash, key []byte, proofDb ethdb.KeyValueReader) (value []byte, err error) { + key = keybytesToHex(key) + wantHash := rootHash + for i := 0; ; i++ { + buf, _ := proofDb.Get(wantHash[:]) + if buf == nil { + return nil, fmt.Errorf("proof node %d (hash %064x) missing", i, wantHash) + } + n, err := decodeNode(wantHash[:], buf) + if err != nil { + return nil, fmt.Errorf("bad proof node %d: %v", i, err) + } + keyrest, cld := get(n, key, true) + switch cld := cld.(type) { + case nil: + // The trie doesn't contain the key. + return nil, nil + case hashNode: + key = keyrest + copy(wantHash[:], cld) + case valueNode: + return cld, nil + } + } +} + +// proofToPath converts a merkle proof to trie node path. The main purpose of +// this function is recovering a node path from the merkle proof stream. All +// necessary nodes will be resolved and leave the remaining as hashnode. +// +// The given edge proof is allowed to be an existent or non-existent proof. +func proofToPath(rootHash common.Hash, root node, key []byte, proofDb ethdb.KeyValueReader, allowNonExistent bool) (node, []byte, error) { + // resolveNode retrieves and resolves trie node from merkle proof stream + resolveNode := func(hash common.Hash) (node, error) { + buf, _ := proofDb.Get(hash[:]) + if buf == nil { + return nil, fmt.Errorf("proof node (hash %064x) missing", hash) + } + n, err := decodeNode(hash[:], buf) + if err != nil { + return nil, fmt.Errorf("bad proof node %v", err) + } + return n, err + } + // If the root node is empty, resolve it first. + // Root node must be included in the proof. + if root == nil { + n, err := resolveNode(rootHash) + if err != nil { + return nil, nil, err + } + root = n + } + var ( + err error + child, parent node + keyrest []byte + valnode []byte + ) + key, parent = keybytesToHex(key), root + for { + keyrest, child = get(parent, key, false) + switch cld := child.(type) { + case nil: + // The trie doesn't contain the key. It's possible + // the proof is a non-existing proof, but at least + // we can prove all resolved nodes are correct, it's + // enough for us to prove range. + if allowNonExistent { + return root, nil, nil + } + return nil, nil, errors.New("the node is not contained in trie") + case *shortNode: + key, parent = keyrest, child // Already resolved + continue + case *fullNode: + key, parent = keyrest, child // Already resolved + continue + case hashNode: + child, err = resolveNode(common.BytesToHash(cld)) + if err != nil { + return nil, nil, err + } + case valueNode: + valnode = cld + } + // Link the parent and child. + switch pnode := parent.(type) { + case *shortNode: + pnode.Val = child + case *fullNode: + pnode.Children[key[0]] = child + default: + panic(fmt.Sprintf("%T: invalid node: %v", pnode, pnode)) + } + if len(valnode) > 0 { + return root, valnode, nil // The whole path is resolved + } + key, parent = keyrest, child + } +} + +// unsetInternal removes all internal node references(hashnode, embedded node). +// It should be called after a trie is constructed with two edge paths. Also +// the given boundary keys must be the one used to construct the edge paths. +// +// It's the key step for range proof. All visited nodes should be marked dirty +// since the node content might be modified. Besides it can happen that some +// fullnodes only have one child which is disallowed. But if the proof is valid, +// the missing children will be filled, otherwise it will be thrown anyway. +// +// Note we have the assumption here the given boundary keys are different +// and right is larger than left. +func unsetInternal(n node, left []byte, right []byte) (bool, error) { + left, right = keybytesToHex(left), keybytesToHex(right) + + // Step down to the fork point. There are two scenarios can happen: + // - the fork point is a shortnode: either the key of left proof or + // right proof doesn't match with shortnode's key. + // - the fork point is a fullnode: both two edge proofs are allowed + // to point to a non-existent key. + var ( + pos = 0 + parent node + + // fork indicator, 0 means no fork, -1 means proof is less, 1 means proof is greater + shortForkLeft, shortForkRight int + ) +findFork: + for { + switch rn := (n).(type) { + case *shortNode: + rn.flags = nodeFlag{dirty: true} + + // If either the key of left proof or right proof doesn't match with + // shortnode, stop here and the forkpoint is the shortnode. + if len(left)-pos < len(rn.Key) { + shortForkLeft = bytes.Compare(left[pos:], rn.Key) + } else { + shortForkLeft = bytes.Compare(left[pos:pos+len(rn.Key)], rn.Key) + } + if len(right)-pos < len(rn.Key) { + shortForkRight = bytes.Compare(right[pos:], rn.Key) + } else { + shortForkRight = bytes.Compare(right[pos:pos+len(rn.Key)], rn.Key) + } + if shortForkLeft != 0 || shortForkRight != 0 { + break findFork + } + parent = n + n, pos = rn.Val, pos+len(rn.Key) + case *fullNode: + rn.flags = nodeFlag{dirty: true} + + // If either the node pointed by left proof or right proof is nil, + // stop here and the forkpoint is the fullnode. + leftnode, rightnode := rn.Children[left[pos]], rn.Children[right[pos]] + if leftnode == nil || rightnode == nil || leftnode != rightnode { + break findFork + } + parent = n + n, pos = rn.Children[left[pos]], pos+1 + default: + panic(fmt.Sprintf("%T: invalid node: %v", n, n)) + } + } + switch rn := n.(type) { + case *shortNode: + // There can have these five scenarios: + // - both proofs are less than the trie path => no valid range + // - both proofs are greater than the trie path => no valid range + // - left proof is less and right proof is greater => valid range, unset the shortnode entirely + // - left proof points to the shortnode, but right proof is greater + // - right proof points to the shortnode, but left proof is less + if shortForkLeft == -1 && shortForkRight == -1 { + return false, errors.New("empty range") + } + if shortForkLeft == 1 && shortForkRight == 1 { + return false, errors.New("empty range") + } + if shortForkLeft != 0 && shortForkRight != 0 { + // The fork point is root node, unset the entire trie + if parent == nil { + return true, nil + } + parent.(*fullNode).Children[left[pos-1]] = nil + return false, nil + } + // Only one proof points to non-existent key. + if shortForkRight != 0 { + if _, ok := rn.Val.(valueNode); ok { + // The fork point is root node, unset the entire trie + if parent == nil { + return true, nil + } + parent.(*fullNode).Children[left[pos-1]] = nil + return false, nil + } + return false, unset(rn, rn.Val, left[pos:], len(rn.Key), false) + } + if shortForkLeft != 0 { + if _, ok := rn.Val.(valueNode); ok { + // The fork point is root node, unset the entire trie + if parent == nil { + return true, nil + } + parent.(*fullNode).Children[right[pos-1]] = nil + return false, nil + } + return false, unset(rn, rn.Val, right[pos:], len(rn.Key), true) + } + return false, nil + case *fullNode: + // unset all internal nodes in the forkpoint + for i := left[pos] + 1; i < right[pos]; i++ { + rn.Children[i] = nil + } + if err := unset(rn, rn.Children[left[pos]], left[pos:], 1, false); err != nil { + return false, err + } + if err := unset(rn, rn.Children[right[pos]], right[pos:], 1, true); err != nil { + return false, err + } + return false, nil + default: + panic(fmt.Sprintf("%T: invalid node: %v", n, n)) + } +} + +// unset removes all internal node references either the left most or right most. +// It can meet these scenarios: +// +// - The given path is existent in the trie, unset the associated nodes with the +// specific direction +// - The given path is non-existent in the trie +// - the fork point is a fullnode, the corresponding child pointed by path +// is nil, return +// - the fork point is a shortnode, the shortnode is included in the range, +// keep the entire branch and return. +// - the fork point is a shortnode, the shortnode is excluded in the range, +// unset the entire branch. +func unset(parent node, child node, key []byte, pos int, removeLeft bool) error { + switch cld := child.(type) { + case *fullNode: + if removeLeft { + for i := 0; i < int(key[pos]); i++ { + cld.Children[i] = nil + } + cld.flags = nodeFlag{dirty: true} + } else { + for i := key[pos] + 1; i < 16; i++ { + cld.Children[i] = nil + } + cld.flags = nodeFlag{dirty: true} + } + return unset(cld, cld.Children[key[pos]], key, pos+1, removeLeft) + case *shortNode: + if len(key[pos:]) < len(cld.Key) || !bytes.Equal(cld.Key, key[pos:pos+len(cld.Key)]) { + // Find the fork point, it's an non-existent branch. + if removeLeft { + if bytes.Compare(cld.Key, key[pos:]) < 0 { + // The key of fork shortnode is less than the path + // (it belongs to the range), unset the entrie + // branch. The parent must be a fullnode. + fn := parent.(*fullNode) + fn.Children[key[pos-1]] = nil + } + //else { + // The key of fork shortnode is greater than the + // path(it doesn't belong to the range), keep + // it with the cached hash available. + //} + } else { + if bytes.Compare(cld.Key, key[pos:]) > 0 { + // The key of fork shortnode is greater than the + // path(it belongs to the range), unset the entrie + // branch. The parent must be a fullnode. + fn := parent.(*fullNode) + fn.Children[key[pos-1]] = nil + } + //else { + // The key of fork shortnode is less than the + // path(it doesn't belong to the range), keep + // it with the cached hash available. + //} + } + return nil + } + if _, ok := cld.Val.(valueNode); ok { + fn := parent.(*fullNode) + fn.Children[key[pos-1]] = nil + return nil + } + cld.flags = nodeFlag{dirty: true} + return unset(cld, cld.Val, key, pos+len(cld.Key), removeLeft) + case nil: + // If the node is nil, then it's a child of the fork point + // fullnode(it's a non-existent branch). + return nil + default: + panic("it shouldn't happen") // hashNode, valueNode + } +} + +// hasRightElement returns the indicator whether there exists more elements +// on the right side of the given path. The given path can point to an existent +// key or a non-existent one. This function has the assumption that the whole +// path should already be resolved. +func hasRightElement(node node, key []byte) bool { + pos, key := 0, keybytesToHex(key) + for node != nil { + switch rn := node.(type) { + case *fullNode: + for i := key[pos] + 1; i < 16; i++ { + if rn.Children[i] != nil { + return true + } + } + node, pos = rn.Children[key[pos]], pos+1 + case *shortNode: + if len(key)-pos < len(rn.Key) || !bytes.Equal(rn.Key, key[pos:pos+len(rn.Key)]) { + return bytes.Compare(rn.Key, key[pos:]) > 0 + } + node, pos = rn.Val, pos+len(rn.Key) + case valueNode: + return false // We have resolved the whole path + default: + panic(fmt.Sprintf("%T: invalid node: %v", node, node)) // hashnode + } + } + return false +} + +// get returns the child of the given node. Return nil if the +// node with specified key doesn't exist at all. +// +// There is an additional flag `skipResolved`. If it's set then +// all resolved nodes won't be returned. +func get(tn node, key []byte, skipResolved bool) ([]byte, node) { + for { + switch n := tn.(type) { + case *shortNode: + if len(key) < len(n.Key) || !bytes.Equal(n.Key, key[:len(n.Key)]) { + return nil, nil + } + tn = n.Val + key = key[len(n.Key):] + if !skipResolved { + return key, tn + } + case *fullNode: + tn = n.Children[key[0]] + key = key[1:] + if !skipResolved { + return key, tn + } + case hashNode: + return key, n + case nil: + return key, nil + case valueNode: + return nil, n + default: + panic(fmt.Sprintf("%T: invalid node: %v", tn, tn)) + } + } +} diff --git a/bycid/trie/secure_trie.go b/bycid/trie/secure_trie.go new file mode 100644 index 0000000..197d526 --- /dev/null +++ b/bycid/trie/secure_trie.go @@ -0,0 +1,114 @@ +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/statediff/indexer/ipld" +) + +// StateTrie wraps a trie with key hashing. In a secure trie, all +// access operations hash the key using keccak256. This prevents +// calling code from creating long chains of nodes that +// increase the access time. +// +// Contrary to a regular trie, a StateTrie can only be created with +// New and must have an attached database. +// +// StateTrie is not safe for concurrent use. +type StateTrie struct { + trie Trie + hashKeyBuf [common.HashLength]byte +} + +// NewStateTrie creates a trie with an existing root node from a backing database +// and optional intermediate in-memory node pool. +// +// If root is the zero hash or the sha3 hash of an empty string, the +// trie is initially empty. Otherwise, New will panic if db is nil +// and returns MissingNodeError if the root node cannot be found. +// +// Accessing the trie loads nodes from the database or node pool on demand. +// Loaded nodes are kept around until their 'cache generation' expires. +// A new cache generation is created by each call to Commit. +// cachelimit sets the number of past cache generations to keep. +// +// Retrieves IPLD blocks by CID encoded as "eth-state-trie" +func NewStateTrie(owner common.Hash, root common.Hash, db *Database) (*StateTrie, error) { + return newStateTrie(owner, root, db, ipld.MEthStateTrie) +} + +// NewStorageTrie is identical to NewStateTrie, but retrieves IPLD blocks encoded +// as "eth-storage-trie" +func NewStorageTrie(owner common.Hash, root common.Hash, db *Database) (*StateTrie, error) { + return newStateTrie(owner, root, db, ipld.MEthStorageTrie) +} + +func newStateTrie(owner common.Hash, root common.Hash, db *Database, codec uint64) (*StateTrie, error) { + if db == nil { + panic("NewStateTrie called without a database") + } + trie, err := New(owner, root, db, codec) + if err != nil { + return nil, err + } + return &StateTrie{trie: *trie}, nil +} + +// TryGet returns the value for key stored in the trie. +// The value bytes must not be modified by the caller. +// If a node was not found in the database, a MissingNodeError is returned. +func (t *StateTrie) TryGet(key []byte) ([]byte, error) { + return t.trie.TryGet(t.hashKey(key)) +} + +func (t *StateTrie) TryGetAccount(key []byte) (*types.StateAccount, error) { + var ret types.StateAccount + res, err := t.TryGet(key) + if err != nil { + // log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) + panic(fmt.Sprintf("Unhandled trie error: %v", err)) + return &ret, err + } + if res == nil { + return nil, nil + } + err = rlp.DecodeBytes(res, &ret) + return &ret, err +} + +// Hash returns the root hash of StateTrie. It does not write to the +// database and can be used even if the trie doesn't have one. +func (t *StateTrie) Hash() common.Hash { + return t.trie.Hash() +} + +// hashKey returns the hash of key as an ephemeral buffer. +// The caller must not hold onto the return value because it will become +// invalid on the next call to hashKey or secKey. +func (t *StateTrie) hashKey(key []byte) []byte { + h := newHasher(false) + h.sha.Reset() + h.sha.Write(key) + h.sha.Read(t.hashKeyBuf[:]) + returnHasherToPool(h) + return t.hashKeyBuf[:] +} diff --git a/bycid/trie/trie.go b/bycid/trie/trie.go new file mode 100644 index 0000000..4ee793d --- /dev/null +++ b/bycid/trie/trie.go @@ -0,0 +1,155 @@ +// Package trie implements Merkle Patricia Tries. +package trie + +import ( + "bytes" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + + "github.com/ethereum/go-ethereum/statediff/indexer/ipld" +) + +var ( + // emptyRoot is the known root hash of an empty trie. + emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") + + // emptyState is the known hash of an empty state trie entry. + emptyState = crypto.Keccak256Hash(nil) +) + +// Trie is a Merkle Patricia Trie. Use New to create a trie that sits on +// top of a database. Whenever trie performs a commit operation, the generated +// nodes will be gathered and returned in a set. Once the trie is committed, +// it's not usable anymore. Callers have to re-create the trie with new root +// based on the updated trie database. +// +// Trie is not safe for concurrent use. +type Trie struct { + root node + owner common.Hash + + // Keep track of the number leaves which have been inserted since the last + // hashing operation. This number will not directly map to the number of + // actually unhashed nodes. + unhashed int + + // db is the handler trie can retrieve nodes from. It's + // only for reading purpose and not available for writing. + db *Database + + // Multihash codec for key encoding + codec uint64 +} + +// New creates a trie with an existing root node from db and an assigned +// owner for storage proximity. +// +// If root is the zero hash or the sha3 hash of an empty string, the +// trie is initially empty and does not require a database. Otherwise, +// New will panic if db is nil and returns a MissingNodeError if root does +// not exist in the database. Accessing the trie loads nodes from db on demand. +func New(owner common.Hash, root common.Hash, db *Database, codec uint64) (*Trie, error) { + trie := &Trie{ + owner: owner, + db: db, + codec: codec, + } + if root != (common.Hash{}) && root != emptyRoot { + rootnode, err := trie.resolveHash(root[:], nil) + if err != nil { + return nil, err + } + trie.root = rootnode + } + return trie, nil +} + +// NewEmpty is a shortcut to create empty tree. It's mostly used in tests. +func NewEmpty(db *Database) *Trie { + tr, _ := New(common.Hash{}, common.Hash{}, db, ipld.MEthStateTrie) + return tr +} + +// TryGet returns the value for key stored in the trie. +// The value bytes must not be modified by the caller. +// If a node was not found in the database, a MissingNodeError is returned. +func (t *Trie) TryGet(key []byte) ([]byte, error) { + value, newroot, didResolve, err := t.tryGet(t.root, keybytesToHex(key), 0) + if err == nil && didResolve { + t.root = newroot + } + return value, err +} + +func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode node, didResolve bool, err error) { + switch n := (origNode).(type) { + case nil: + return nil, nil, false, nil + case valueNode: + return n, n, false, nil + case *shortNode: + if len(key)-pos < len(n.Key) || !bytes.Equal(n.Key, key[pos:pos+len(n.Key)]) { + // key not found in trie + return nil, n, false, nil + } + value, newnode, didResolve, err = t.tryGet(n.Val, key, pos+len(n.Key)) + if err == nil && didResolve { + n = n.copy() + n.Val = newnode + } + return value, n, didResolve, err + case *fullNode: + value, newnode, didResolve, err = t.tryGet(n.Children[key[pos]], key, pos+1) + if err == nil && didResolve { + n = n.copy() + n.Children[key[pos]] = newnode + } + return value, n, didResolve, err + case hashNode: + child, err := t.resolveHash(n, key[:pos]) + if err != nil { + return nil, n, true, err + } + value, newnode, _, err := t.tryGet(child, key, pos) + return value, newnode, true, err + default: + panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) + } +} + +// resolveHash loads node from the underlying database with the provided +// node hash and path prefix. +func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) { + cid := ipld.Keccak256ToCid(t.codec, n) + node, err := t.db.node(cid.Bytes()) + if err != nil { + return nil, err + } + if node != nil { + return node, nil + } + return nil, &MissingNodeError{Owner: t.owner, NodeHash: n, Path: prefix} +} + +// Hash returns the root hash of the trie. It does not write to the +// database and can be used even if the trie doesn't have one. +func (t *Trie) Hash() common.Hash { + hash, cached, _ := t.hashRoot() + t.root = cached + return common.BytesToHash(hash.(hashNode)) +} + +// hashRoot calculates the root hash of the given trie +func (t *Trie) hashRoot() (node, node, error) { + if t.root == nil { + return hashNode(emptyRoot.Bytes()), nil, nil + } + // If the number of changes is below 100, we let one thread handle it + h := newHasher(t.unhashed >= 100) + defer returnHasherToPool(h) + hashed, cached := h.hash(t.root, true) + t.unhashed = 0 + return hashed, cached, nil +} diff --git a/ipld/util.go b/ipld/util.go new file mode 100644 index 0000000..6cc93e1 --- /dev/null +++ b/ipld/util.go @@ -0,0 +1,36 @@ +package ipld + +import ( + "github.com/ethereum/go-ethereum/statediff/indexer/ipld" + "github.com/ipfs/go-cid" + "github.com/multiformats/go-multihash" +) + +const ( + RawBinary = ipld.RawBinary + MEthHeader = ipld.MEthHeader + MEthHeaderList = ipld.MEthHeaderList + MEthTxTrie = ipld.MEthTxTrie + MEthTx = ipld.MEthTx + MEthTxReceiptTrie = ipld.MEthTxReceiptTrie + MEthTxReceipt = ipld.MEthTxReceipt + MEthStateTrie = ipld.MEthStateTrie + MEthAccountSnapshot = ipld.MEthAccountSnapshot + MEthStorageTrie = ipld.MEthStorageTrie + MEthLogTrie = ipld.MEthLogTrie + MEthLog = ipld.MEthLog +) + +var RawdataToCid = ipld.RawdataToCid + +// var Keccak256ToCid = ipld.Keccak256ToCid + +// // Keccak256ToCid converts keccak256 hash bytes into a v1 cid +// // (non-panicking function) +// func Keccak256ToCid(hash []byte, codecType uint64) (cid.Cid, error) { +// mh, err := multihash.Encode(hash, multihash.KECCAK_256) +// if err != nil { +// return cid.Cid{}, err +// } +// return cid.NewCidV1(codecType, mh), nil +// } diff --git a/pkg/trie_builder_utils/util.go b/pkg/trie_builder_utils/util.go index ca5e027..c8b0270 100644 --- a/pkg/trie_builder_utils/util.go +++ b/pkg/trie_builder_utils/util.go @@ -8,12 +8,12 @@ import ( "github.com/ethereum/go-ethereum/crypto" ) -// BuildAndReportKeySetWithBranchToDepth takes a depth argument -// and returns the first two slots (that when hashed into trie keys) that intersect at or below that provided depth -// it hashes the slots and converts to nibbles before finding the intersection -// it also returns the nibble and hex string representations of the two intersecting keys -// this is useful for identifying what contract slots need to be occupied to cause branching in the storage trie -// at or below a provided height +// BuildAndReportKeySetWithBranchToDepth takes a depth argument and returns +// the first two slots that (when hashed into trie keys) intersect at or below the provided depth. +// It then hashes the slots and converts to nibbles before finding their intersection. +// It also returns the nibble and hex string representations of the two intersecting keys. +// This is useful for identifying what contract slots need to be occupied to cause branching in the storage trie +// at or below a provided height. func BuildAndReportKeySetWithBranchToDepth(depth int) (string, string, []byte, []byte, string, string) { slots, storageLeafKeys, storageLeafKeyStrs, key1, key2 := buildKeySetWithBranchToDepth(depth) var slot1 string