diff --git a/core/txpool/txpool.go b/core/txpool/txpool.go index 0d4e05da4..d03e025a9 100644 --- a/core/txpool/txpool.go +++ b/core/txpool/txpool.go @@ -72,6 +72,9 @@ type TxPool struct { subs event.SubscriptionScope // Subscription scope to unsubscribe all on shutdown quit chan chan error // Quit channel to tear down the head updater + term chan struct{} // Termination channel to detect a closed pool + + sync chan chan error // Testing / simulator channel to block until internal reset is done } // New creates a new transaction pool to gather, sort and filter inbound @@ -86,6 +89,8 @@ func New(gasTip *big.Int, chain BlockChain, subpools []SubPool) (*TxPool, error) subpools: subpools, reservations: make(map[common.Address]SubPool), quit: make(chan chan error), + term: make(chan struct{}), + sync: make(chan chan error), } for i, subpool := range subpools { if err := subpool.Init(gasTip, head, pool.reserver(i, subpool)); err != nil { @@ -174,6 +179,9 @@ func (p *TxPool) Close() error { // outside blockchain events as well as for various reporting and transaction // eviction events. func (p *TxPool) loop(head *types.Header, chain BlockChain) { + // Close the termination marker when the pool stops + defer close(p.term) + // Subscribe to chain head events to trigger subpool resets var ( newHeadCh = make(chan core.ChainHeadEvent) @@ -190,13 +198,23 @@ func (p *TxPool) loop(head *types.Header, chain BlockChain) { var ( resetBusy = make(chan struct{}, 1) // Allow 1 reset to run concurrently resetDone = make(chan *types.Header) + + resetForced bool // Whether a forced reset was requested, only used in simulator mode + resetWaiter chan error // Channel waiting on a forced reset, only used in simulator mode ) + // Notify the live reset waiter to not block if the txpool is closed. + defer func() { + if resetWaiter != nil { + resetWaiter <- errors.New("pool already terminated") + resetWaiter = nil + } + }() var errc chan error for errc == nil { // Something interesting might have happened, run a reset if there is // one needed but none is running. The resetter will run on its own // goroutine to allow chain head events to be consumed contiguously. - if newHead != oldHead { + if newHead != oldHead || resetForced { // Try to inject a busy marker and start a reset if successful select { case resetBusy <- struct{}{}: @@ -208,8 +226,17 @@ func (p *TxPool) loop(head *types.Header, chain BlockChain) { resetDone <- newHead }(oldHead, newHead) + // If the reset operation was explicitly requested, consider it + // being fulfilled and drop the request marker. If it was not, + // this is a noop. + resetForced = false + default: - // Reset already running, wait until it finishes + // Reset already running, wait until it finishes. + // + // Note, this will not drop any forced reset request. If a forced + // reset was requested, but we were busy, then when the currently + // running reset finishes, a new one will be spun up. } } // Wait for the next chain head event or a previous reset finish @@ -223,8 +250,26 @@ func (p *TxPool) loop(head *types.Header, chain BlockChain) { oldHead = head <-resetBusy + // If someone is waiting for a reset to finish, notify them, unless + // the forced op is still pending. In that case, wait another round + // of resets. + if resetWaiter != nil && !resetForced { + resetWaiter <- nil + resetWaiter = nil + } + case errc = <-p.quit: // Termination requested, break out on the next loop round + + case syncc := <-p.sync: + // Transaction pool is running inside a simulator, and we are about + // to create a new block. Request a forced sync operation to ensure + // that any running reset operation finishes to make block imports + // deterministic. On top of that, run a new reset operation to make + // transaction insertions deterministic instead of being stuck in a + // queue waiting for a reset. + resetForced = true + resetWaiter = syncc } } // Notify the closer of termination (no error possible for now) @@ -415,3 +460,20 @@ func (p *TxPool) Status(hash common.Hash) TxStatus { } return TxStatusUnknown } + +// Sync is a helper method for unit tests or simulator runs where the chain events +// are arriving in quick succession, without any time in between them to run the +// internal background reset operations. This method will run an explicit reset +// operation to ensure the pool stabilises, thus avoiding flakey behavior. +// +// Note, do not use this in production / live code. In live code, the pool is +// meant to reset on a separate thread to avoid DoS vectors. +func (p *TxPool) Sync() error { + sync := make(chan error) + select { + case p.sync <- sync: + return <-sync + case <-p.term: + return errors.New("pool already terminated") + } +} diff --git a/eth/catalyst/api.go b/eth/catalyst/api.go index d7dfb3ec9..f02b5f362 100644 --- a/eth/catalyst/api.go +++ b/eth/catalyst/api.go @@ -180,7 +180,7 @@ func (api *ConsensusAPI) ForkchoiceUpdatedV1(update engine.ForkchoiceStateV1, pa return engine.STATUS_INVALID, engine.InvalidParams.With(errors.New("forkChoiceUpdateV1 called post-shanghai")) } } - return api.forkchoiceUpdated(update, payloadAttributes) + return api.forkchoiceUpdated(update, payloadAttributes, false) } // ForkchoiceUpdatedV2 is equivalent to V1 with the addition of withdrawals in the payload attributes. @@ -196,7 +196,7 @@ func (api *ConsensusAPI) ForkchoiceUpdatedV2(update engine.ForkchoiceStateV1, pa return engine.STATUS_INVALID, engine.UnsupportedFork.With(errors.New("forkchoiceUpdatedV2 must only be called for shanghai payloads")) } } - return api.forkchoiceUpdated(update, params) + return api.forkchoiceUpdated(update, params, false) } // ForkchoiceUpdatedV3 is equivalent to V2 with the addition of parent beacon block root in the payload attributes. @@ -220,10 +220,10 @@ func (api *ConsensusAPI) ForkchoiceUpdatedV3(update engine.ForkchoiceStateV1, pa // hash, even if params are wrong. To do this we need to split up // forkchoiceUpdate into a function that only updates the head and then a // function that kicks off block construction. - return api.forkchoiceUpdated(update, params) + return api.forkchoiceUpdated(update, params, false) } -func (api *ConsensusAPI) forkchoiceUpdated(update engine.ForkchoiceStateV1, payloadAttributes *engine.PayloadAttributes) (engine.ForkChoiceResponse, error) { +func (api *ConsensusAPI) forkchoiceUpdated(update engine.ForkchoiceStateV1, payloadAttributes *engine.PayloadAttributes, simulatorMode bool) (engine.ForkChoiceResponse, error) { api.forkchoiceLock.Lock() defer api.forkchoiceLock.Unlock() @@ -330,7 +330,7 @@ func (api *ConsensusAPI) forkchoiceUpdated(update engine.ForkchoiceStateV1, payl if merger := api.eth.Merger(); !merger.PoSFinalized() { merger.FinalizePoS() } - // If the finalized block is not in our canonical tree, somethings wrong + // If the finalized block is not in our canonical tree, something is wrong finalBlock := api.eth.BlockChain().GetBlockByHash(update.FinalizedBlockHash) if finalBlock == nil { log.Warn("Final block not available in database", "hash", update.FinalizedBlockHash) @@ -342,7 +342,7 @@ func (api *ConsensusAPI) forkchoiceUpdated(update engine.ForkchoiceStateV1, payl // Set the finalized block api.eth.BlockChain().SetFinalized(finalBlock.Header()) } - // Check if the safe block hash is in our canonical tree, if not somethings wrong + // Check if the safe block hash is in our canonical tree, if not something is wrong if update.SafeBlockHash != (common.Hash{}) { safeBlock := api.eth.BlockChain().GetBlockByHash(update.SafeBlockHash) if safeBlock == nil { @@ -374,6 +374,19 @@ func (api *ConsensusAPI) forkchoiceUpdated(update engine.ForkchoiceStateV1, payl if api.localBlocks.has(id) { return valid(&id), nil } + // If the beacon chain is ran by a simulator, then transaction insertion, + // block insertion and block production will happen without any timing + // delay between them. This will cause flaky simulator executions due to + // the transaction pool running its internal reset operation on a back- + // ground thread. To avoid the racey behavior - in simulator mode - the + // pool will be explicitly blocked on its reset before continuing to the + // block production below. + if simulatorMode { + if err := api.eth.TxPool().Sync(); err != nil { + log.Error("Failed to sync transaction pool", "err", err) + return valid(nil), engine.InvalidPayloadAttributes.With(err) + } + } payload, err := api.eth.Miner().BuildPayload(args) if err != nil { log.Error("Failed to build payload", "err", err) diff --git a/eth/catalyst/simulated_beacon.go b/eth/catalyst/simulated_beacon.go index 3c081074c..f55fe0813 100644 --- a/eth/catalyst/simulated_beacon.go +++ b/eth/catalyst/simulated_beacon.go @@ -155,12 +155,12 @@ func (c *SimulatedBeacon) sealBlock(withdrawals []*types.Withdrawal, timestamp u var random [32]byte rand.Read(random[:]) - fcResponse, err := c.engineAPI.ForkchoiceUpdatedV2(c.curForkchoiceState, &engine.PayloadAttributes{ + fcResponse, err := c.engineAPI.forkchoiceUpdated(c.curForkchoiceState, &engine.PayloadAttributes{ Timestamp: timestamp, SuggestedFeeRecipient: feeRecipient, Withdrawals: withdrawals, Random: random, - }) + }, true) if err != nil { return err }