From fe188030df1bd436fe88792bea0da761098a9faf Mon Sep 17 00:00:00 2001 From: i-norden Date: Thu, 23 Feb 2023 16:10:34 -0600 Subject: [PATCH] port over required internal geth packages --- access_list.go | 120 +++++++++++ journal.go | 253 ++++++++++++++++++++++ state_object.go | 521 +++++++++++++++++++++++++++++++++++++++++++++ trie_prefetcher.go | 339 +++++++++++++++++++++++++++++ 4 files changed, 1233 insertions(+) create mode 100644 access_list.go create mode 100644 journal.go create mode 100644 state_object.go create mode 100644 trie_prefetcher.go diff --git a/access_list.go b/access_list.go new file mode 100644 index 0000000..7aa2579 --- /dev/null +++ b/access_list.go @@ -0,0 +1,120 @@ +package ipld_eth_statedb + +import ( + "github.com/ethereum/go-ethereum/common" +) + +type accessList struct { + addresses map[common.Address]int + slots []map[common.Hash]struct{} +} + +// ContainsAddress returns true if the address is in the access list. +func (al *accessList) ContainsAddress(address common.Address) bool { + _, ok := al.addresses[address] + return ok +} + +// Contains checks if a slot within an account is present in the access list, returning +// separate flags for the presence of the account and the slot respectively. +func (al *accessList) Contains(address common.Address, slot common.Hash) (addressPresent bool, slotPresent bool) { + idx, ok := al.addresses[address] + if !ok { + // no such address (and hence zero slots) + return false, false + } + if idx == -1 { + // address yes, but no slots + return true, false + } + _, slotPresent = al.slots[idx][slot] + return true, slotPresent +} + +// newAccessList creates a new accessList. +func newAccessList() *accessList { + return &accessList{ + addresses: make(map[common.Address]int), + } +} + +// Copy creates an independent copy of an accessList. +func (a *accessList) Copy() *accessList { + cp := newAccessList() + for k, v := range a.addresses { + cp.addresses[k] = v + } + cp.slots = make([]map[common.Hash]struct{}, len(a.slots)) + for i, slotMap := range a.slots { + newSlotmap := make(map[common.Hash]struct{}, len(slotMap)) + for k := range slotMap { + newSlotmap[k] = struct{}{} + } + cp.slots[i] = newSlotmap + } + return cp +} + +// AddAddress adds an address to the access list, and returns 'true' if the operation +// caused a change (addr was not previously in the list). +func (al *accessList) AddAddress(address common.Address) bool { + if _, present := al.addresses[address]; present { + return false + } + al.addresses[address] = -1 + return true +} + +// AddSlot adds the specified (addr, slot) combo to the access list. +// Return values are: +// - address added +// - slot added +// For any 'true' value returned, a corresponding journal entry must be made. +func (al *accessList) AddSlot(address common.Address, slot common.Hash) (addrChange bool, slotChange bool) { + idx, addrPresent := al.addresses[address] + if !addrPresent || idx == -1 { + // Address not present, or addr present but no slots there + al.addresses[address] = len(al.slots) + slotmap := map[common.Hash]struct{}{slot: {}} + al.slots = append(al.slots, slotmap) + return !addrPresent, true + } + // There is already an (address,slot) mapping + slotmap := al.slots[idx] + if _, ok := slotmap[slot]; !ok { + slotmap[slot] = struct{}{} + // Journal add slot change + return false, true + } + // No changes required + return false, false +} + +// DeleteSlot removes an (address, slot)-tuple from the access list. +// This operation needs to be performed in the same order as the addition happened. +// This method is meant to be used by the journal, which maintains ordering of +// operations. +func (al *accessList) DeleteSlot(address common.Address, slot common.Hash) { + idx, addrOk := al.addresses[address] + // There are two ways this can fail + if !addrOk { + panic("reverting slot change, address not present in list") + } + slotmap := al.slots[idx] + delete(slotmap, slot) + // If that was the last (first) slot, remove it + // Since additions and rollbacks are always performed in order, + // we can delete the item without worrying about screwing up later indices + if len(slotmap) == 0 { + al.slots = al.slots[:idx] + al.addresses[address] = -1 + } +} + +// DeleteAddress removes an address from the access list. This operation +// needs to be performed in the same order as the addition happened. +// This method is meant to be used by the journal, which maintains ordering of +// operations. +func (al *accessList) DeleteAddress(address common.Address) { + delete(al.addresses, address) +} diff --git a/journal.go b/journal.go new file mode 100644 index 0000000..1206865 --- /dev/null +++ b/journal.go @@ -0,0 +1,253 @@ +package ipld_eth_statedb + +import ( + "math/big" + + "github.com/ethereum/go-ethereum/common" +) + +// journalEntry is a modification entry in the state change journal that can be +// reverted on demand. +type journalEntry interface { + // revert undoes the changes introduced by this journal entry. + revert(*StateDB) + + // dirtied returns the Ethereum address modified by this journal entry. + dirtied() *common.Address +} + +// journal contains the list of state modifications applied since the last state +// commit. These are tracked to be able to be reverted in the case of an execution +// exception or request for reversal. +type journal struct { + entries []journalEntry // Current changes tracked by the journal + dirties map[common.Address]int // Dirty accounts and the number of changes +} + +// newJournal creates a new initialized journal. +func newJournal() *journal { + return &journal{ + dirties: make(map[common.Address]int), + } +} + +// append inserts a new modification entry to the end of the change journal. +func (j *journal) append(entry journalEntry) { + j.entries = append(j.entries, entry) + if addr := entry.dirtied(); addr != nil { + j.dirties[*addr]++ + } +} + +// revert undoes a batch of journalled modifications along with any reverted +// dirty handling too. +func (j *journal) revert(statedb *StateDB, snapshot int) { + for i := len(j.entries) - 1; i >= snapshot; i-- { + // Undo the changes made by the operation + j.entries[i].revert(statedb) + + // Drop any dirty tracking induced by the change + if addr := j.entries[i].dirtied(); addr != nil { + if j.dirties[*addr]--; j.dirties[*addr] == 0 { + delete(j.dirties, *addr) + } + } + } + j.entries = j.entries[:snapshot] +} + +// dirty explicitly sets an address to dirty, even if the change entries would +// otherwise suggest it as clean. This method is an ugly hack to handle the RIPEMD +// precompile consensus exception. +func (j *journal) dirty(addr common.Address) { + j.dirties[addr]++ +} + +// length returns the current number of entries in the journal. +func (j *journal) length() int { + return len(j.entries) +} + +type ( + // Changes to the account trie. + createObjectChange struct { + account *common.Address + } + resetObjectChange struct { + prev *stateObject + prevdestruct bool + } + suicideChange struct { + account *common.Address + prev bool // whether account had already suicided + prevbalance *big.Int + } + + // Changes to individual accounts. + balanceChange struct { + account *common.Address + prev *big.Int + } + nonceChange struct { + account *common.Address + prev uint64 + } + storageChange struct { + account *common.Address + key, prevalue common.Hash + } + codeChange struct { + account *common.Address + prevcode, prevhash []byte + } + + // Changes to other state values. + refundChange struct { + prev uint64 + } + addLogChange struct { + txhash common.Hash + } + addPreimageChange struct { + hash common.Hash + } + touchChange struct { + account *common.Address + } + // Changes to the access list + accessListAddAccountChange struct { + address *common.Address + } + accessListAddSlotChange struct { + address *common.Address + slot *common.Hash + } +) + +func (ch createObjectChange) revert(s *StateDB) { + delete(s.stateObjects, *ch.account) + delete(s.stateObjectsDirty, *ch.account) +} + +func (ch createObjectChange) dirtied() *common.Address { + return ch.account +} + +func (ch resetObjectChange) revert(s *StateDB) { + s.setStateObject(ch.prev) + if !ch.prevdestruct && s.snap != nil { + delete(s.snapDestructs, ch.prev.addrHash) + } +} + +func (ch resetObjectChange) dirtied() *common.Address { + return nil +} + +func (ch suicideChange) revert(s *StateDB) { + obj := s.getStateObject(*ch.account) + if obj != nil { + obj.suicided = ch.prev + obj.setBalance(ch.prevbalance) + } +} + +func (ch suicideChange) dirtied() *common.Address { + return ch.account +} + +var ripemd = common.HexToAddress("0000000000000000000000000000000000000003") + +func (ch touchChange) revert(s *StateDB) { +} + +func (ch touchChange) dirtied() *common.Address { + return ch.account +} + +func (ch balanceChange) revert(s *StateDB) { + s.getStateObject(*ch.account).setBalance(ch.prev) +} + +func (ch balanceChange) dirtied() *common.Address { + return ch.account +} + +func (ch nonceChange) revert(s *StateDB) { + s.getStateObject(*ch.account).setNonce(ch.prev) +} + +func (ch nonceChange) dirtied() *common.Address { + return ch.account +} + +func (ch codeChange) revert(s *StateDB) { + s.getStateObject(*ch.account).setCode(common.BytesToHash(ch.prevhash), ch.prevcode) +} + +func (ch codeChange) dirtied() *common.Address { + return ch.account +} + +func (ch storageChange) revert(s *StateDB) { + s.getStateObject(*ch.account).setState(ch.key, ch.prevalue) +} + +func (ch storageChange) dirtied() *common.Address { + return ch.account +} + +func (ch refundChange) revert(s *StateDB) { + s.refund = ch.prev +} + +func (ch refundChange) dirtied() *common.Address { + return nil +} + +func (ch addLogChange) revert(s *StateDB) { + logs := s.logs[ch.txhash] + if len(logs) == 1 { + delete(s.logs, ch.txhash) + } else { + s.logs[ch.txhash] = logs[:len(logs)-1] + } + s.logSize-- +} + +func (ch addLogChange) dirtied() *common.Address { + return nil +} + +func (ch addPreimageChange) revert(s *StateDB) { + delete(s.preimages, ch.hash) +} + +func (ch addPreimageChange) dirtied() *common.Address { + return nil +} + +func (ch accessListAddAccountChange) revert(s *StateDB) { + /* + One important invariant here, is that whenever a (addr, slot) is added, if the + addr is not already present, the add causes two journal entries: + - one for the address, + - one for the (address,slot) + Therefore, when unrolling the change, we can always blindly delete the + (addr) at this point, since no storage adds can remain when come upon + a single (addr) change. + */ + s.accessList.DeleteAddress(*ch.address) +} + +func (ch accessListAddAccountChange) dirtied() *common.Address { + return nil +} + +func (ch accessListAddSlotChange) revert(s *StateDB) { + s.accessList.DeleteSlot(*ch.address, *ch.slot) +} + +func (ch accessListAddSlotChange) dirtied() *common.Address { + return nil +} diff --git a/state_object.go b/state_object.go new file mode 100644 index 0000000..f42f88c --- /dev/null +++ b/state_object.go @@ -0,0 +1,521 @@ +package ipld_eth_statedb + +import ( + "bytes" + "fmt" + "io" + "math/big" + "time" + + "github.com/ethereum/go-ethereum/core/state" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +var emptyCodeHash = crypto.Keccak256(nil) + +type Code []byte + +func (c Code) String() string { + return string(c) //strings.Join(Disassemble(c), " ") +} + +type Storage map[common.Hash]common.Hash + +func (s Storage) String() (str string) { + for key, value := range s { + str += fmt.Sprintf("%X : %X\n", key, value) + } + + return +} + +func (s Storage) Copy() Storage { + cpy := make(Storage, len(s)) + for key, value := range s { + cpy[key] = value + } + + return cpy +} + +// stateObject represents an Ethereum account which is being modified. +// +// The usage pattern is as follows: +// First you need to obtain a state object. +// Account values can be accessed and modified through the object. +// Finally, call CommitTrie to write the modified storage trie into a database. +type stateObject struct { + address common.Address + addrHash common.Hash // hash of ethereum address of the account + data types.StateAccount + db *StateDB + + // DB error. + // State objects are used by the consensus core and VM which are + // unable to deal with database-level errors. Any error that occurs + // during a database read is memoized here and will eventually be returned + // by StateDB.Commit. + dbErr error + + // Write caches. + trie state.Trie // storage trie, which becomes non-nil on first access + code Code // contract bytecode, which gets set when code is loaded + + originStorage Storage // Storage cache of original entries to dedup rewrites, reset for every transaction + pendingStorage Storage // Storage entries that need to be flushed to disk, at the end of an entire block + dirtyStorage Storage // Storage entries that have been modified in the current transaction execution + fakeStorage Storage // Fake storage which constructed by caller for debugging purpose. + + // Cache flags. + // When an object is marked suicided it will be delete from the trie + // during the "update" phase of the state transition. + dirtyCode bool // true if the code was updated + suicided bool + deleted bool +} + +// empty returns whether the account is considered empty. +func (s *stateObject) empty() bool { + return s.data.Nonce == 0 && s.data.Balance.Sign() == 0 && bytes.Equal(s.data.CodeHash, emptyCodeHash) +} + +// newObject creates a state object. +func newObject(db *StateDB, address common.Address, data types.StateAccount) *stateObject { + if data.Balance == nil { + data.Balance = new(big.Int) + } + if data.CodeHash == nil { + data.CodeHash = emptyCodeHash + } + if data.Root == (common.Hash{}) { + data.Root = emptyRoot + } + return &stateObject{ + db: db, + address: address, + addrHash: crypto.Keccak256Hash(address[:]), + data: data, + originStorage: make(Storage), + pendingStorage: make(Storage), + dirtyStorage: make(Storage), + } +} + +// EncodeRLP implements rlp.Encoder. +func (s *stateObject) EncodeRLP(w io.Writer) error { + return rlp.Encode(w, &s.data) +} + +// setError remembers the first non-nil error it is called with. +func (s *stateObject) setError(err error) { + if s.dbErr == nil { + s.dbErr = err + } +} + +func (s *stateObject) markSuicided() { + s.suicided = true +} + +func (s *stateObject) touch() { + s.db.journal.append(touchChange{ + account: &s.address, + }) + if s.address == ripemd { + // Explicitly put it in the dirty-cache, which is otherwise generated from + // flattened journals. + s.db.journal.dirty(s.address) + } +} + +func (s *stateObject) getTrie(db state.Database) state.Trie { + if s.trie == nil { + // Try fetching from prefetcher first + // We don't prefetch empty tries + if s.data.Root != emptyRoot && s.db.prefetcher != nil { + // When the miner is creating the pending state, there is no + // prefetcher + s.trie = s.db.prefetcher.trie(s.addrHash, s.data.Root) + } + if s.trie == nil { + var err error + s.trie, err = db.OpenStorageTrie(s.addrHash, s.data.Root) + if err != nil { + s.trie, _ = db.OpenStorageTrie(s.addrHash, common.Hash{}) + s.setError(fmt.Errorf("can't create storage trie: %v", err)) + } + } + } + return s.trie +} + +// GetState retrieves a value from the account storage trie. +func (s *stateObject) GetState(db state.Database, key common.Hash) common.Hash { + // If the fake storage is set, only lookup the state here(in the debugging mode) + if s.fakeStorage != nil { + return s.fakeStorage[key] + } + // If we have a dirty value for this state entry, return it + value, dirty := s.dirtyStorage[key] + if dirty { + return value + } + // Otherwise return the entry's original value + return s.GetCommittedState(db, key) +} + +// GetCommittedState retrieves a value from the committed account storage trie. +func (s *stateObject) GetCommittedState(db state.Database, key common.Hash) common.Hash { + // If the fake storage is set, only lookup the state here(in the debugging mode) + if s.fakeStorage != nil { + return s.fakeStorage[key] + } + // If we have a pending write or clean cached, return that + if value, pending := s.pendingStorage[key]; pending { + return value + } + if value, cached := s.originStorage[key]; cached { + return value + } + // If no live objects are available, attempt to use snapshots + var ( + enc []byte + err error + ) + if s.db.snap != nil { + // If the object was destructed in *this* block (and potentially resurrected), + // the storage has been cleared out, and we should *not* consult the previous + // snapshot about any storage values. The only possible alternatives are: + // 1) resurrect happened, and new slot values were set -- those should + // have been handles via pendingStorage above. + // 2) we don't have new values, and can deliver empty response back + if _, destructed := s.db.snapDestructs[s.addrHash]; destructed { + return common.Hash{} + } + start := time.Now() + enc, err = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key.Bytes())) + if metrics.EnabledExpensive { + s.db.SnapshotStorageReads += time.Since(start) + } + } + // If the snapshot is unavailable or reading from it fails, load from the database. + if s.db.snap == nil || err != nil { + start := time.Now() + enc, err = s.getTrie(db).TryGet(key.Bytes()) + if metrics.EnabledExpensive { + s.db.StorageReads += time.Since(start) + } + if err != nil { + s.setError(err) + return common.Hash{} + } + } + var value common.Hash + if len(enc) > 0 { + _, content, _, err := rlp.Split(enc) + if err != nil { + s.setError(err) + } + value.SetBytes(content) + } + s.originStorage[key] = value + return value +} + +// SetState updates a value in account storage. +func (s *stateObject) SetState(db state.Database, key, value common.Hash) { + // If the fake storage is set, put the temporary state update here. + if s.fakeStorage != nil { + s.fakeStorage[key] = value + return + } + // If the new value is the same as old, don't set + prev := s.GetState(db, key) + if prev == value { + return + } + // New value is different, update and journal the change + s.db.journal.append(storageChange{ + account: &s.address, + key: key, + prevalue: prev, + }) + s.setState(key, value) +} + +// SetStorage replaces the entire state storage with the given one. +// +// After this function is called, all original state will be ignored and state +// lookup only happens in the fake state storage. +// +// Note this function should only be used for debugging purpose. +func (s *stateObject) SetStorage(storage map[common.Hash]common.Hash) { + // Allocate fake storage if it's nil. + if s.fakeStorage == nil { + s.fakeStorage = make(Storage) + } + for key, value := range storage { + s.fakeStorage[key] = value + } + // Don't bother journal since this function should only be used for + // debugging and the `fake` storage won't be committed to database. +} + +func (s *stateObject) setState(key, value common.Hash) { + s.dirtyStorage[key] = value +} + +// finalise moves all dirty storage slots into the pending area to be hashed or +// committed later. It is invoked at the end of every transaction. +func (s *stateObject) finalise(prefetch bool) { + slotsToPrefetch := make([][]byte, 0, len(s.dirtyStorage)) + for key, value := range s.dirtyStorage { + s.pendingStorage[key] = value + if value != s.originStorage[key] { + slotsToPrefetch = append(slotsToPrefetch, common.CopyBytes(key[:])) // Copy needed for closure + } + } + if s.db.prefetcher != nil && prefetch && len(slotsToPrefetch) > 0 && s.data.Root != emptyRoot { + s.db.prefetcher.prefetch(s.addrHash, s.data.Root, slotsToPrefetch) + } + if len(s.dirtyStorage) > 0 { + s.dirtyStorage = make(Storage) + } +} + +// updateTrie writes cached storage modifications into the object's storage trie. +// It will return nil if the trie has not been loaded and no changes have been made +func (s *stateObject) updateTrie(db state.Database) state.Trie { + // Make sure all dirty slots are finalized into the pending storage area + s.finalise(false) // Don't prefetch anymore, pull directly if need be + if len(s.pendingStorage) == 0 { + return s.trie + } + // Track the amount of time wasted on updating the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) + } + // The snapshot storage map for the object + var storage map[common.Hash][]byte + // Insert all the pending updates into the trie + tr := s.getTrie(db) + hasher := s.db.hasher + + usedStorage := make([][]byte, 0, len(s.pendingStorage)) + for key, value := range s.pendingStorage { + // Skip noop changes, persist actual changes + if value == s.originStorage[key] { + continue + } + s.originStorage[key] = value + + var v []byte + if (value == common.Hash{}) { + s.setError(tr.TryDelete(key[:])) + s.db.StorageDeleted += 1 + } else { + // Encoding []byte cannot fail, ok to ignore the error. + v, _ = rlp.EncodeToBytes(common.TrimLeftZeroes(value[:])) + s.setError(tr.TryUpdate(key[:], v)) + s.db.StorageUpdated += 1 + } + // If state snapshotting is active, cache the data til commit + if s.db.snap != nil { + if storage == nil { + // Retrieve the old storage map, if available, create a new one otherwise + if storage = s.db.snapStorage[s.addrHash]; storage == nil { + storage = make(map[common.Hash][]byte) + s.db.snapStorage[s.addrHash] = storage + } + } + storage[crypto.HashData(hasher, key[:])] = v // v will be nil if it's deleted + } + usedStorage = append(usedStorage, common.CopyBytes(key[:])) // Copy needed for closure + } + if s.db.prefetcher != nil { + s.db.prefetcher.used(s.addrHash, s.data.Root, usedStorage) + } + if len(s.pendingStorage) > 0 { + s.pendingStorage = make(Storage) + } + return tr +} + +// UpdateRoot sets the trie root to the current root hash of +func (s *stateObject) updateRoot(db state.Database) { + // If nothing changed, don't bother with hashing anything + if s.updateTrie(db) == nil { + return + } + // Track the amount of time wasted on hashing the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) + } + s.data.Root = s.trie.Hash() +} + +// CommitTrie the storage trie of the object to db. +// This updates the trie root. +func (s *stateObject) CommitTrie(db state.Database) (*trie.NodeSet, error) { + // If nothing changed, don't bother with hashing anything + if s.updateTrie(db) == nil { + return nil, nil + } + if s.dbErr != nil { + return nil, s.dbErr + } + // Track the amount of time wasted on committing the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) + } + root, nodes, err := s.trie.Commit(false) + if err == nil { + s.data.Root = root + } + return nodes, err +} + +// AddBalance adds amount to s's balance. +// It is used to add funds to the destination account of a transfer. +func (s *stateObject) AddBalance(amount *big.Int) { + // EIP161: We must check emptiness for the objects such that the account + // clearing (0,0,0 objects) can take effect. + if amount.Sign() == 0 { + if s.empty() { + s.touch() + } + return + } + s.SetBalance(new(big.Int).Add(s.Balance(), amount)) +} + +// SubBalance removes amount from s's balance. +// It is used to remove funds from the origin account of a transfer. +func (s *stateObject) SubBalance(amount *big.Int) { + if amount.Sign() == 0 { + return + } + s.SetBalance(new(big.Int).Sub(s.Balance(), amount)) +} + +func (s *stateObject) SetBalance(amount *big.Int) { + s.db.journal.append(balanceChange{ + account: &s.address, + prev: new(big.Int).Set(s.data.Balance), + }) + s.setBalance(amount) +} + +func (s *stateObject) setBalance(amount *big.Int) { + s.data.Balance = amount +} + +func (s *stateObject) deepCopy(db *StateDB) *stateObject { + stateObject := newObject(db, s.address, s.data) + if s.trie != nil { + stateObject.trie = db.db.CopyTrie(s.trie) + } + stateObject.code = s.code + stateObject.dirtyStorage = s.dirtyStorage.Copy() + stateObject.originStorage = s.originStorage.Copy() + stateObject.pendingStorage = s.pendingStorage.Copy() + stateObject.suicided = s.suicided + stateObject.dirtyCode = s.dirtyCode + stateObject.deleted = s.deleted + return stateObject +} + +// +// Attribute accessors +// + +// Returns the address of the contract/account +func (s *stateObject) Address() common.Address { + return s.address +} + +// Code returns the contract code associated with this object, if any. +func (s *stateObject) Code(db state.Database) []byte { + if s.code != nil { + return s.code + } + if bytes.Equal(s.CodeHash(), emptyCodeHash) { + return nil + } + code, err := db.ContractCode(s.addrHash, common.BytesToHash(s.CodeHash())) + if err != nil { + s.setError(fmt.Errorf("can't load code hash %x: %v", s.CodeHash(), err)) + } + s.code = code + return code +} + +// CodeSize returns the size of the contract code associated with this object, +// or zero if none. This method is an almost mirror of Code, but uses a cache +// inside the database to avoid loading codes seen recently. +func (s *stateObject) CodeSize(db state.Database) int { + if s.code != nil { + return len(s.code) + } + if bytes.Equal(s.CodeHash(), emptyCodeHash) { + return 0 + } + size, err := db.ContractCodeSize(s.addrHash, common.BytesToHash(s.CodeHash())) + if err != nil { + s.setError(fmt.Errorf("can't load code size %x: %v", s.CodeHash(), err)) + } + return size +} + +func (s *stateObject) SetCode(codeHash common.Hash, code []byte) { + prevcode := s.Code(s.db.db) + s.db.journal.append(codeChange{ + account: &s.address, + prevhash: s.CodeHash(), + prevcode: prevcode, + }) + s.setCode(codeHash, code) +} + +func (s *stateObject) setCode(codeHash common.Hash, code []byte) { + s.code = code + s.data.CodeHash = codeHash[:] + s.dirtyCode = true +} + +func (s *stateObject) SetNonce(nonce uint64) { + s.db.journal.append(nonceChange{ + account: &s.address, + prev: s.data.Nonce, + }) + s.setNonce(nonce) +} + +func (s *stateObject) setNonce(nonce uint64) { + s.data.Nonce = nonce +} + +func (s *stateObject) CodeHash() []byte { + return s.data.CodeHash +} + +func (s *stateObject) Balance() *big.Int { + return s.data.Balance +} + +func (s *stateObject) Nonce() uint64 { + return s.data.Nonce +} + +// Never called, but must be present to allow stateObject to be used +// as a vm.Account interface that also satisfies the vm.ContractRef +// interface. Interfaces are awesome. +func (s *stateObject) Value() *big.Int { + panic("Value on stateObject should never be called") +} diff --git a/trie_prefetcher.go b/trie_prefetcher.go new file mode 100644 index 0000000..52700ae --- /dev/null +++ b/trie_prefetcher.go @@ -0,0 +1,339 @@ +package ipld_eth_statedb + +import ( + "sync" + + "github.com/ethereum/go-ethereum/core/state" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" +) + +var ( + // triePrefetchMetricsPrefix is the prefix under which to publish the metrics. + triePrefetchMetricsPrefix = "trie/prefetch/" +) + +// triePrefetcher is an active prefetcher, which receives accounts or storage +// items and does trie-loading of them. The goal is to get as much useful content +// into the caches as possible. +// +// Note, the prefetcher's API is not thread safe. +type triePrefetcher struct { + db state.Database // Database to fetch trie nodes through + root common.Hash // Root hash of the account trie for metrics + fetches map[string]state.Trie // Partially or fully fetcher tries + fetchers map[string]*subfetcher // Subfetchers for each trie + + deliveryMissMeter metrics.Meter + accountLoadMeter metrics.Meter + accountDupMeter metrics.Meter + accountSkipMeter metrics.Meter + accountWasteMeter metrics.Meter + storageLoadMeter metrics.Meter + storageDupMeter metrics.Meter + storageSkipMeter metrics.Meter + storageWasteMeter metrics.Meter +} + +func newTriePrefetcher(db state.Database, root common.Hash, namespace string) *triePrefetcher { + prefix := triePrefetchMetricsPrefix + namespace + p := &triePrefetcher{ + db: db, + root: root, + fetchers: make(map[string]*subfetcher), // Active prefetchers use the fetchers map + + deliveryMissMeter: metrics.GetOrRegisterMeter(prefix+"/deliverymiss", nil), + accountLoadMeter: metrics.GetOrRegisterMeter(prefix+"/account/load", nil), + accountDupMeter: metrics.GetOrRegisterMeter(prefix+"/account/dup", nil), + accountSkipMeter: metrics.GetOrRegisterMeter(prefix+"/account/skip", nil), + accountWasteMeter: metrics.GetOrRegisterMeter(prefix+"/account/waste", nil), + storageLoadMeter: metrics.GetOrRegisterMeter(prefix+"/storage/load", nil), + storageDupMeter: metrics.GetOrRegisterMeter(prefix+"/storage/dup", nil), + storageSkipMeter: metrics.GetOrRegisterMeter(prefix+"/storage/skip", nil), + storageWasteMeter: metrics.GetOrRegisterMeter(prefix+"/storage/waste", nil), + } + return p +} + +// close iterates over all the subfetchers, aborts any that were left spinning +// and reports the stats to the metrics subsystem. +func (p *triePrefetcher) close() { + for _, fetcher := range p.fetchers { + fetcher.abort() // safe to do multiple times + + if metrics.Enabled { + if fetcher.root == p.root { + p.accountLoadMeter.Mark(int64(len(fetcher.seen))) + p.accountDupMeter.Mark(int64(fetcher.dups)) + p.accountSkipMeter.Mark(int64(len(fetcher.tasks))) + + for _, key := range fetcher.used { + delete(fetcher.seen, string(key)) + } + p.accountWasteMeter.Mark(int64(len(fetcher.seen))) + } else { + p.storageLoadMeter.Mark(int64(len(fetcher.seen))) + p.storageDupMeter.Mark(int64(fetcher.dups)) + p.storageSkipMeter.Mark(int64(len(fetcher.tasks))) + + for _, key := range fetcher.used { + delete(fetcher.seen, string(key)) + } + p.storageWasteMeter.Mark(int64(len(fetcher.seen))) + } + } + } + // Clear out all fetchers (will crash on a second call, deliberate) + p.fetchers = nil +} + +// copy creates a deep-but-inactive copy of the trie prefetcher. Any trie data +// already loaded will be copied over, but no goroutines will be started. This +// is mostly used in the miner which creates a copy of it's actively mutated +// state to be sealed while it may further mutate the state. +func (p *triePrefetcher) copy() *triePrefetcher { + copy := &triePrefetcher{ + db: p.db, + root: p.root, + fetches: make(map[string]state.Trie), // Active prefetchers use the fetches map + + deliveryMissMeter: p.deliveryMissMeter, + accountLoadMeter: p.accountLoadMeter, + accountDupMeter: p.accountDupMeter, + accountSkipMeter: p.accountSkipMeter, + accountWasteMeter: p.accountWasteMeter, + storageLoadMeter: p.storageLoadMeter, + storageDupMeter: p.storageDupMeter, + storageSkipMeter: p.storageSkipMeter, + storageWasteMeter: p.storageWasteMeter, + } + // If the prefetcher is already a copy, duplicate the data + if p.fetches != nil { + for root, fetch := range p.fetches { + copy.fetches[root] = p.db.CopyTrie(fetch) + } + return copy + } + // Otherwise we're copying an active fetcher, retrieve the current states + for id, fetcher := range p.fetchers { + copy.fetches[id] = fetcher.peek() + } + return copy +} + +// prefetch schedules a batch of trie items to prefetch. +func (p *triePrefetcher) prefetch(owner common.Hash, root common.Hash, keys [][]byte) { + // If the prefetcher is an inactive one, bail out + if p.fetches != nil { + return + } + // Active fetcher, schedule the retrievals + id := p.trieID(owner, root) + fetcher := p.fetchers[id] + if fetcher == nil { + fetcher = newSubfetcher(p.db, owner, root) + p.fetchers[id] = fetcher + } + fetcher.schedule(keys) +} + +// trie returns the trie matching the root hash, or nil if the prefetcher doesn't +// have it. +func (p *triePrefetcher) trie(owner common.Hash, root common.Hash) state.Trie { + // If the prefetcher is inactive, return from existing deep copies + id := p.trieID(owner, root) + if p.fetches != nil { + trie := p.fetches[id] + if trie == nil { + p.deliveryMissMeter.Mark(1) + return nil + } + return p.db.CopyTrie(trie) + } + // Otherwise the prefetcher is active, bail if no trie was prefetched for this root + fetcher := p.fetchers[id] + if fetcher == nil { + p.deliveryMissMeter.Mark(1) + return nil + } + // Interrupt the prefetcher if it's by any chance still running and return + // a copy of any pre-loaded trie. + fetcher.abort() // safe to do multiple times + + trie := fetcher.peek() + if trie == nil { + p.deliveryMissMeter.Mark(1) + return nil + } + return trie +} + +// used marks a batch of state items used to allow creating statistics as to +// how useful or wasteful the prefetcher is. +func (p *triePrefetcher) used(owner common.Hash, root common.Hash, used [][]byte) { + if fetcher := p.fetchers[p.trieID(owner, root)]; fetcher != nil { + fetcher.used = used + } +} + +// trieID returns an unique trie identifier consists the trie owner and root hash. +func (p *triePrefetcher) trieID(owner common.Hash, root common.Hash) string { + return string(append(owner.Bytes(), root.Bytes()...)) +} + +// subfetcher is a trie fetcher goroutine responsible for pulling entries for a +// single trie. It is spawned when a new root is encountered and lives until the +// main prefetcher is paused and either all requested items are processed or if +// the trie being worked on is retrieved from the prefetcher. +type subfetcher struct { + db state.Database // Database to load trie nodes through + owner common.Hash // Owner of the trie, usually account hash + root common.Hash // Root hash of the trie to prefetch + trie state.Trie // Trie being populated with nodes + + tasks [][]byte // Items queued up for retrieval + lock sync.Mutex // Lock protecting the task queue + + wake chan struct{} // Wake channel if a new task is scheduled + stop chan struct{} // Channel to interrupt processing + term chan struct{} // Channel to signal interruption + copy chan chan state.Trie // Channel to request a copy of the current trie + + seen map[string]struct{} // Tracks the entries already loaded + dups int // Number of duplicate preload tasks + used [][]byte // Tracks the entries used in the end +} + +// newSubfetcher creates a goroutine to prefetch state items belonging to a +// particular root hash. +func newSubfetcher(db state.Database, owner common.Hash, root common.Hash) *subfetcher { + sf := &subfetcher{ + db: db, + owner: owner, + root: root, + wake: make(chan struct{}, 1), + stop: make(chan struct{}), + term: make(chan struct{}), + copy: make(chan chan state.Trie), + seen: make(map[string]struct{}), + } + go sf.loop() + return sf +} + +// schedule adds a batch of trie keys to the queue to prefetch. +func (sf *subfetcher) schedule(keys [][]byte) { + // Append the tasks to the current queue + sf.lock.Lock() + sf.tasks = append(sf.tasks, keys...) + sf.lock.Unlock() + + // Notify the prefetcher, it's fine if it's already terminated + select { + case sf.wake <- struct{}{}: + default: + } +} + +// peek tries to retrieve a deep copy of the fetcher's trie in whatever form it +// is currently. +func (sf *subfetcher) peek() state.Trie { + ch := make(chan state.Trie) + select { + case sf.copy <- ch: + // Subfetcher still alive, return copy from it + return <-ch + + case <-sf.term: + // Subfetcher already terminated, return a copy directly + if sf.trie == nil { + return nil + } + return sf.db.CopyTrie(sf.trie) + } +} + +// abort interrupts the subfetcher immediately. It is safe to call abort multiple +// times but it is not thread safe. +func (sf *subfetcher) abort() { + select { + case <-sf.stop: + default: + close(sf.stop) + } + <-sf.term +} + +// loop waits for new tasks to be scheduled and keeps loading them until it runs +// out of tasks or its underlying trie is retrieved for committing. +func (sf *subfetcher) loop() { + // No matter how the loop stops, signal anyone waiting that it's terminated + defer close(sf.term) + + // Start by opening the trie and stop processing if it fails + if sf.owner == (common.Hash{}) { + trie, err := sf.db.OpenTrie(sf.root) + if err != nil { + log.Warn("Trie prefetcher failed opening trie", "root", sf.root, "err", err) + return + } + sf.trie = trie + } else { + trie, err := sf.db.OpenStorageTrie(sf.owner, sf.root) + if err != nil { + log.Warn("Trie prefetcher failed opening trie", "root", sf.root, "err", err) + return + } + sf.trie = trie + } + // Trie opened successfully, keep prefetching items + for { + select { + case <-sf.wake: + // Subfetcher was woken up, retrieve any tasks to avoid spinning the lock + sf.lock.Lock() + tasks := sf.tasks + sf.tasks = nil + sf.lock.Unlock() + + // Prefetch any tasks until the loop is interrupted + for i, task := range tasks { + select { + case <-sf.stop: + // If termination is requested, add any leftover back and return + sf.lock.Lock() + sf.tasks = append(sf.tasks, tasks[i:]...) + sf.lock.Unlock() + return + + case ch := <-sf.copy: + // Somebody wants a copy of the current trie, grant them + ch <- sf.db.CopyTrie(sf.trie) + + default: + // No termination request yet, prefetch the next entry + if _, ok := sf.seen[string(task)]; ok { + sf.dups++ + } else { + if len(task) == len(common.Address{}) { + sf.trie.TryGetAccount(task) + } else { + sf.trie.TryGet(task) + } + sf.seen[string(task)] = struct{}{} + } + } + } + + case ch := <-sf.copy: + // Somebody wants a copy of the current trie, grant them + ch <- sf.db.CopyTrie(sf.trie) + + case <-sf.stop: + // Termination is requested, abort and leave remaining tasks + return + } + } +}