From c09d19b68baa92eeda7d5f08a3f58052ae4cff5a Mon Sep 17 00:00:00 2001 From: i-norden Date: Wed, 1 Nov 2023 14:20:25 -0500 Subject: [PATCH 1/2] additional StateDB and stateObject methods required for tracing --- direct_by_leaf/state_object.go | 29 +++++- direct_by_leaf/statedb.go | 166 ++++++++++++++++++++++++++++++--- 2 files changed, 179 insertions(+), 16 deletions(-) diff --git a/direct_by_leaf/state_object.go b/direct_by_leaf/state_object.go index d388589..7bf22cf 100644 --- a/direct_by_leaf/state_object.go +++ b/direct_by_leaf/state_object.go @@ -340,9 +340,36 @@ func (s *stateObject) Nonce() uint64 { return s.data.Nonce } -// Never called, but must be present to allow stateObject to be used +// Value is never called, but must be present to allow stateObject to be used // as a vm.Account interface that also satisfies the vm.ContractRef // interface. Interfaces are awesome. func (s *stateObject) Value() *big.Int { panic("Value on stateObject should never be called") } + +// finalise moves all dirty storage slots into the pending area to be hashed or +// committed later. It is invoked at the end of every transaction. +func (s *stateObject) finalise(prefetch bool) { + slotsToPrefetch := make([][]byte, 0, len(s.dirtyStorage)) + for key, value := range s.dirtyStorage { + s.pendingStorage[key] = value + if value != s.originStorage[key] { + slotsToPrefetch = append(slotsToPrefetch, common.CopyBytes(key[:])) // Copy needed for closure + } + } + if len(s.dirtyStorage) > 0 { + s.dirtyStorage = make(Storage) + } +} + +func (s *stateObject) deepCopy(db *StateDB) *stateObject { + stateObject := newObject(db, s.address, s.data, s.blockHash) + stateObject.code = s.code + stateObject.dirtyStorage = s.dirtyStorage.Copy() + stateObject.originStorage = s.originStorage.Copy() + stateObject.pendingStorage = s.pendingStorage.Copy() + stateObject.suicided = s.suicided + stateObject.dirtyCode = s.dirtyCode + stateObject.deleted = s.deleted + return stateObject +} diff --git a/direct_by_leaf/statedb.go b/direct_by_leaf/statedb.go index ff2c0fd..e0d0229 100644 --- a/direct_by_leaf/statedb.go +++ b/direct_by_leaf/statedb.go @@ -51,9 +51,10 @@ type StateDB struct { originBlockHash common.Hash // This map holds 'live' objects, which will get modified while processing a state transition. - stateObjects map[common.Address]*stateObject - stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie - stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution + stateObjects map[common.Address]*stateObject + stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie + stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution + stateObjectsDestruct map[common.Address]struct{} // State objects destructed in the block // DB error. // State objects are used by the consensus core and VM which are @@ -92,17 +93,18 @@ type StateDB struct { // New creates a new StateDB on the state for the provided blockHash func New(blockHash common.Hash, db StateDatabase) (*StateDB, error) { sdb := &StateDB{ - db: db, - originBlockHash: blockHash, - stateObjects: make(map[common.Address]*stateObject), - stateObjectsPending: make(map[common.Address]struct{}), - stateObjectsDirty: make(map[common.Address]struct{}), - logs: make(map[common.Hash][]*types.Log), - preimages: make(map[common.Hash][]byte), - journal: newJournal(), - accessList: newAccessList(), - transientStorage: newTransientStorage(), - hasher: crypto.NewKeccakState(), + db: db, + originBlockHash: blockHash, + stateObjects: make(map[common.Address]*stateObject), + stateObjectsPending: make(map[common.Address]struct{}), + stateObjectsDirty: make(map[common.Address]struct{}), + stateObjectsDestruct: make(map[common.Address]struct{}), + logs: make(map[common.Hash][]*types.Log), + preimages: make(map[common.Hash][]byte), + journal: newJournal(), + accessList: newAccessList(), + transientStorage: newTransientStorage(), + hasher: crypto.NewKeccakState(), } return sdb, nil } @@ -282,6 +284,7 @@ func (s *StateDB) SetState(addr common.Address, key, value common.Hash) { // SetStorage replaces the entire storage for the specified account with given // storage. This function should only be used for debugging. func (s *StateDB) SetStorage(addr common.Address, storage map[common.Hash]common.Hash) { + s.stateObjectsDestruct[addr] = struct{}{} stateObject := s.getOrNewStateObject(addr) if stateObject != nil { stateObject.SetStorage(storage) @@ -399,11 +402,19 @@ func (s *StateDB) getOrNewStateObject(addr common.Address) *stateObject { // the given address, it is overwritten and returned as the second return value. func (s *StateDB) createObject(addr common.Address) (newobj, prev *stateObject) { prev = s.getDeletedStateObject(addr) // Note, prev might have been deleted, we need that! + + var prevdestruct bool + if prev != nil { + _, prevdestruct = s.stateObjectsDestruct[prev.address] + if !prevdestruct { + s.stateObjectsDestruct[prev.address] = struct{}{} + } + } newobj = newObject(s, addr, types.StateAccount{}, s.originBlockHash) if prev == nil { s.journal.append(createObjectChange{account: &addr}) } else { - s.journal.append(resetObjectChange{prev: prev}) // NOTE: prevdestruct used to be set here from snapshot + s.journal.append(resetObjectChange{prev: prev, prevdestruct: prevdestruct}) // NOTE: prevdestruct used to be set here from snapshot } s.setStateObject(newobj) if prev != nil && !prev.deleted { @@ -541,3 +552,128 @@ func (s *StateDB) AddressInAccessList(addr common.Address) bool { func (s *StateDB) SlotInAccessList(addr common.Address, slot common.Hash) (addressPresent bool, slotPresent bool) { return s.accessList.Contains(addr, slot) } + +// Finalise finalises the state by removing the destructed objects and clears +// the journal as well as the refunds. Finalise, however, will not push any updates +// into the tries just yet. Only IntermediateRoot or Commit will do that. +func (s *StateDB) Finalise(deleteEmptyObjects bool) { + for addr := range s.journal.dirties { + obj, exist := s.stateObjects[addr] + if !exist { + // ripeMD is 'touched' at block 1714175, in tx 0x1237f737031e40bcde4a8b7e717b2d15e3ecadfe49bb1bbc71ee9deb09c6fcf2 + // That tx goes out of gas, and although the notion of 'touched' does not exist there, the + // touch-event will still be recorded in the journal. Since ripeMD is a special snowflake, + // it will persist in the journal even though the journal is reverted. In this special circumstance, + // it may exist in `s.journal.dirties` but not in `s.stateObjects`. + // Thus, we can safely ignore it here + continue + } + if obj.suicided || (deleteEmptyObjects && obj.empty()) { + obj.deleted = true + + // We need to maintain account deletions explicitly (will remain + // set indefinitely). + s.stateObjectsDestruct[obj.address] = struct{}{} + } else { + obj.finalise(true) // Prefetch slots in the background + } + s.stateObjectsPending[addr] = struct{}{} + s.stateObjectsDirty[addr] = struct{}{} + } + + // Invalidate journal because reverting across transactions is not allowed. + s.clearJournalAndRefund() +} + +// SetTxContext sets the current transaction hash and index which are +// used when the EVM emits new state logs. It should be invoked before +// transaction execution. +func (s *StateDB) SetTxContext(thash common.Hash, ti int) { + s.thash = thash + s.txIndex = ti +} + +func (s *StateDB) clearJournalAndRefund() { + if len(s.journal.entries) > 0 { + s.journal = newJournal() + s.refund = 0 + } + s.validRevisions = s.validRevisions[:0] // Snapshots can be created without journal entries +} + +// Copy creates a deep, independent copy of the state. +// Snapshots of the copied state cannot be applied to the copy. +func (s *StateDB) Copy() *StateDB { + // Copy all the basic fields, initialize the memory ones + state := &StateDB{ + db: s.db, + originBlockHash: s.originBlockHash, + stateObjects: make(map[common.Address]*stateObject, len(s.journal.dirties)), + stateObjectsPending: make(map[common.Address]struct{}, len(s.stateObjectsPending)), + stateObjectsDirty: make(map[common.Address]struct{}, len(s.journal.dirties)), + stateObjectsDestruct: make(map[common.Address]struct{}, len(s.stateObjectsDestruct)), + refund: s.refund, + logs: make(map[common.Hash][]*types.Log, len(s.logs)), + logSize: s.logSize, + preimages: make(map[common.Hash][]byte, len(s.preimages)), + journal: newJournal(), + hasher: crypto.NewKeccakState(), + } + // Copy the dirty states, logs, and preimages + for addr := range s.journal.dirties { + // As documented [here](https://github.com/ethereum/go-ethereum/pull/16485#issuecomment-380438527), + // and in the Finalise-method, there is a case where an object is in the journal but not + // in the stateObjects: OOG after touch on ripeMD prior to Byzantium. Thus, we need to check for + // nil + if object, exist := s.stateObjects[addr]; exist { + // Even though the original object is dirty, we are not copying the journal, + // so we need to make sure that any side-effect the journal would have caused + // during a commit (or similar op) is already applied to the copy. + state.stateObjects[addr] = object.deepCopy(state) + + state.stateObjectsDirty[addr] = struct{}{} // Mark the copy dirty to force internal (code/state) commits + state.stateObjectsPending[addr] = struct{}{} // Mark the copy pending to force external (account) commits + } + } + // Above, we don't copy the actual journal. This means that if the copy + // is copied, the loop above will be a no-op, since the copy's journal + // is empty. Thus, here we iterate over stateObjects, to enable copies + // of copies. + for addr := range s.stateObjectsPending { + if _, exist := state.stateObjects[addr]; !exist { + state.stateObjects[addr] = s.stateObjects[addr].deepCopy(state) + } + state.stateObjectsPending[addr] = struct{}{} + } + for addr := range s.stateObjectsDirty { + if _, exist := state.stateObjects[addr]; !exist { + state.stateObjects[addr] = s.stateObjects[addr].deepCopy(state) + } + state.stateObjectsDirty[addr] = struct{}{} + } + // Deep copy the destruction flag. + for addr := range s.stateObjectsDestruct { + state.stateObjectsDestruct[addr] = struct{}{} + } + for hash, logs := range s.logs { + cpy := make([]*types.Log, len(logs)) + for i, l := range logs { + cpy[i] = new(types.Log) + *cpy[i] = *l + } + state.logs[hash] = cpy + } + for hash, preimage := range s.preimages { + state.preimages[hash] = preimage + } + // Do we need to copy the access list and transient storage? + // In practice: No. At the start of a transaction, these two lists are empty. + // In practice, we only ever copy state _between_ transactions/blocks, never + // in the middle of a transaction. However, it doesn't cost us much to copy + // empty lists, so we do it anyway to not blow up if we ever decide copy them + // in the middle of a transaction. + state.accessList = s.accessList.Copy() + state.transientStorage = s.transientStorage.Copy() + + return state +} From 4faef8b509b366951bd802755763f9e0500e6841 Mon Sep 17 00:00:00 2001 From: i-norden Date: Wed, 1 Nov 2023 16:08:03 -0500 Subject: [PATCH 2/2] go mod --- direct_by_leaf/state_object.go | 4 ---- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/direct_by_leaf/state_object.go b/direct_by_leaf/state_object.go index 7bf22cf..eb42d8c 100644 --- a/direct_by_leaf/state_object.go +++ b/direct_by_leaf/state_object.go @@ -350,12 +350,8 @@ func (s *stateObject) Value() *big.Int { // finalise moves all dirty storage slots into the pending area to be hashed or // committed later. It is invoked at the end of every transaction. func (s *stateObject) finalise(prefetch bool) { - slotsToPrefetch := make([][]byte, 0, len(s.dirtyStorage)) for key, value := range s.dirtyStorage { s.pendingStorage[key] = value - if value != s.originStorage[key] { - slotsToPrefetch = append(slotsToPrefetch, common.CopyBytes(key[:])) // Copy needed for closure - } } if len(s.dirtyStorage) > 0 { s.dirtyStorage = make(Storage) diff --git a/go.mod b/go.mod index 2990ed4..9c143df 100644 --- a/go.mod +++ b/go.mod @@ -115,7 +115,7 @@ require ( ) replace ( - github.com/cerc-io/eth-iterator-utils => git.vdb.to/cerc-io/eth-iterator-utils v0.1.2 + github.com/cerc-io/eth-iterator-utils => git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-beta github.com/cerc-io/eth-testing => git.vdb.to/cerc-io/eth-testing v0.3.1 github.com/cerc-io/plugeth-statediff => git.vdb.to/cerc-io/plugeth-statediff v0.1.4 github.com/ethereum/go-ethereum => git.vdb.to/cerc-io/plugeth v0.0.0-20230808125822-691dc334fab1 diff --git a/go.sum b/go.sum index 67ec601..7901bfd 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -git.vdb.to/cerc-io/eth-iterator-utils v0.1.2 h1:PdMR5B9wrQSYuYpFhN+9Kc8AEZ0pTt5eKCmu8oCtFcY= -git.vdb.to/cerc-io/eth-iterator-utils v0.1.2/go.mod h1:OvXbdWbZ5viBXC/Ui1EkhsSmGB+AUX+TjGa3UDAfjfg= +git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-beta h1:pv1HCRlD7/1X7i35MWylwGhji0aWI4QujsrJoYOW55U= +git.vdb.to/cerc-io/eth-iterator-utils v0.1.2-beta/go.mod h1:OvXbdWbZ5viBXC/Ui1EkhsSmGB+AUX+TjGa3UDAfjfg= git.vdb.to/cerc-io/eth-testing v0.3.1 h1:sPnlMev6oEgTjsW7GtUkSsjKNG/+X6P9q0izSejLGpM= git.vdb.to/cerc-io/plugeth v0.0.0-20230808125822-691dc334fab1 h1:KLjxHwp9Zp7xhECccmJS00RiL+VwTuUGLU7qeIctg8g= git.vdb.to/cerc-io/plugeth v0.0.0-20230808125822-691dc334fab1/go.mod h1:cYXZu70+6xmDgIgrTD81GPasv16piiAFJnKyAbwVPMU=