eth/protocols/snap: snap sync testing (#22179)

* eth/protocols/snap: make timeout configurable * eth/protocols/snap: snap sync testing * eth/protocols/snap: test to trigger panic * eth/protocols/snap: fix race condition on timeouts * eth/protocols/snap: return error on cancelled sync * squashme: updates + test causing panic + properly serve accounts in order * eth/protocols/snap: revert failing storage response * eth/protocols/snap: revert on bad responses (storage, code) * eth/protocols/snap: fix account handling stall * eth/protocols/snap: fix remaining revertal-issues * eth/protocols/snap: timeouthandler for bytecode requests * eth/protocols/snap: debugging + fix log message * eth/protocols/snap: fix misspelliings in docs * eth/protocols/snap: fix race in bytecode handling * eth/protocols/snap: undo deduplication of storage roots * synctests: refactor + minify panic testcase * eth/protocols/snap: minor polishes * eth: minor polishes to make logs more useful * eth/protocols/snap: remove excessive logs from the test runs * eth/protocols/snap: stress tests with concurrency * eth/protocols/snap: further fixes to test cancel channel handling * eth/protocols/snap: extend test timeouts on CI Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2021-01-25 07:17:05 +01:00 · 2021-01-25 07:17:05 +01:00 · 797b0812ab
commit 797b0812ab
parent 3708454f58
6 changed files with 1251 additions and 150 deletions
--- a/eth/downloader/downloader.go
+++ b/eth/downloader/downloader.go
@ -298,7 +298,7 @@ func (d *Downloader) RegisterPeer(id string, version uint, peer Peer) error {
 		// Tests use short IDs, don't choke on them
 		logger = log.New("peer", id)
 	} else {
-		logger = log.New("peer", id[:16])
+		logger = log.New("peer", id[:8])
 	}
 	logger.Trace("Registering sync peer")
 	if err := d.peers.Register(newPeerConnection(id, version, peer, logger)); err != nil {
@ -325,7 +325,7 @@ func (d *Downloader) UnregisterPeer(id string) error {
 		// Tests use short IDs, don't choke on them
 		logger = log.New("peer", id)
 	} else {
-		logger = log.New("peer", id[:16])
+		logger = log.New("peer", id[:8])
 	}
 	logger.Trace("Unregistering sync peer")
 	if err := d.peers.Unregister(id); err != nil {
--- a/eth/handler.go
+++ b/eth/handler.go
@ -326,24 +326,32 @@ func (h *handler) runSnapPeer(peer *snap.Peer, handler snap.Handler) error {
 }

 func (h *handler) removePeer(id string) {
+	// Create a custom logger to avoid printing the entire id
+	var logger log.Logger
+	if len(id) < 16 {
+		// Tests use short IDs, don't choke on them
+		logger = log.New("peer", id)
+	} else {
+		logger = log.New("peer", id[:8])
+	}
 	// Remove the eth peer if it exists
 	eth := h.peers.ethPeer(id)
 	if eth != nil {
-		log.Debug("Removing Ethereum peer", "peer", id)
+		logger.Debug("Removing Ethereum peer")
 		h.downloader.UnregisterPeer(id)
 		h.txFetcher.Drop(id)

 		if err := h.peers.unregisterEthPeer(id); err != nil {
-			log.Error("Peer removal failed", "peer", id, "err", err)
+			logger.Error("Ethereum peer removal failed", "err", err)
 		}
 	}
 	// Remove the snap peer if it exists
 	snap := h.peers.snapPeer(id)
 	if snap != nil {
-		log.Debug("Removing Snapshot peer", "peer", id)
+		logger.Debug("Removing Snapshot peer")
 		h.downloader.SnapSyncer.Unregister(id)
 		if err := h.peers.unregisterSnapPeer(id); err != nil {
-			log.Error("Peer removal failed", "peer", id, "err", err)
+			logger.Error("Snapshot peer removel failed", "err", err)
 		}
 	}
 	// Hard disconnect at the networking layer
--- a/eth/protocols/snap/peer.go
+++ b/eth/protocols/snap/peer.go
@ -56,6 +56,11 @@ func (p *Peer) Version() uint {
 	return p.version
 }

+// Log overrides the P2P logget with the higher level one containing only the id.
+func (p *Peer) Log() log.Logger {
+	return p.logger
+}
+
 // RequestAccountRange fetches a batch of accounts rooted in a specific account
 // trie, starting with the origin.
 func (p *Peer) RequestAccountRange(id uint64, root common.Hash, origin, limit common.Hash, bytes uint64) error {
--- a/eth/protocols/snap/protocol.go
+++ b/eth/protocols/snap/protocol.go
@ -61,6 +61,7 @@ var (
 	errDecode         = errors.New("invalid message")
 	errInvalidMsgCode = errors.New("invalid message code")
 	errBadRequest     = errors.New("bad request")
+	errCancelled      = errors.New("sync cancelled")
 )

 // Packet represents a p2p message in the `snap` protocol.
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@ -73,10 +73,6 @@ const (
 	// waste bandwidth.
 	maxTrieRequestCount = 512

-	// requestTimeout is the maximum time a peer is allowed to spend on serving
-	// a single network request.
-	requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
-
 	// accountConcurrency is the number of chunks to split the account trie into
 	// to allow concurrent retrievals.
 	accountConcurrency = 16
@ -86,6 +82,12 @@ const (
 	storageConcurrency = 16
 )

+var (
+	// requestTimeout is the maximum time a peer is allowed to spend on serving
+	// a single network request.
+	requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
+)
+
 // accountRequest tracks a pending account range request to ensure responses are
 // to actual requests and to validate any security constraints.
 //
@ -331,6 +333,33 @@ type syncProgress struct {
 	BytecodeHealNops   uint64             // Number of bytecodes not requested
 }

+// SyncPeer abstracts out the methods required for a peer to be synced against
+// with the goal of allowing the construction of mock peers without the full
+// blown networking.
+type SyncPeer interface {
+	// ID retrieves the peer's unique identifier.
+	ID() string
+
+	// RequestAccountRange fetches a batch of accounts rooted in a specific account
+	// trie, starting with the origin.
+	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
+
+	// RequestStorageRange fetches a batch of storage slots belonging to one or
+	// more accounts. If slots from only one accout is requested, an origin marker
+	// may also be used to retrieve from there.
+	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
+
+	// RequestByteCodes fetches a batch of bytecodes by hash.
+	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
+
+	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
+	// a specificstate trie.
+	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
+
+	// Log retrieves the peer's own contextual logger.
+	Log() log.Logger
+}
+
 // Syncer is an Ethereum account and storage trie syncer based on snapshots and
 // the  snap protocol. It's purpose is to download all the accounts and storage
 // slots from remote peers and reassemble chunks of the state trie, on top of
@ -348,10 +377,11 @@ type Syncer struct {

 	root    common.Hash    // Current state trie root being synced
 	tasks   []*accountTask // Current account task set being synced
+	snapped bool           // Flag to signal that snap phase is done
 	healer  *healTask      // Current state healing task being executed
 	update  chan struct{}  // Notification channel for possible sync progression

-	peers    map[string]*Peer // Currently active peers to download from
+	peers    map[string]SyncPeer // Currently active peers to download from
 	peerJoin *event.Feed         // Event feed to react to peers joining
 	peerDrop *event.Feed         // Event feed to react to peers dropping

@ -410,12 +440,14 @@ type Syncer struct {
 	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
 }

+// NewSyncer creates a new snapshot syncer to download the Ethereum state over the
+// snap protocol.
 func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer {
 	return &Syncer{
 		db:    db,
 		bloom: bloom,

-		peers:    make(map[string]*Peer),
+		peers:    make(map[string]SyncPeer),
 		peerJoin: new(event.Feed),
 		peerDrop: new(event.Feed),
 		update:   make(chan struct{}, 1),
@ -447,27 +479,29 @@ func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer {
 }

 // Register injects a new data source into the syncer's peerset.
-func (s *Syncer) Register(peer *Peer) error {
+func (s *Syncer) Register(peer SyncPeer) error {
 	// Make sure the peer is not registered yet
+	id := peer.ID()
+
 	s.lock.Lock()
-	if _, ok := s.peers[peer.id]; ok {
-		log.Error("Snap peer already registered", "id", peer.id)
+	if _, ok := s.peers[id]; ok {
+		log.Error("Snap peer already registered", "id", id)

 		s.lock.Unlock()
 		return errors.New("already registered")
 	}
-	s.peers[peer.id] = peer
+	s.peers[id] = peer

 	// Mark the peer as idle, even if no sync is running
-	s.accountIdlers[peer.id] = struct{}{}
-	s.storageIdlers[peer.id] = struct{}{}
-	s.bytecodeIdlers[peer.id] = struct{}{}
-	s.trienodeHealIdlers[peer.id] = struct{}{}
-	s.bytecodeHealIdlers[peer.id] = struct{}{}
+	s.accountIdlers[id] = struct{}{}
+	s.storageIdlers[id] = struct{}{}
+	s.bytecodeIdlers[id] = struct{}{}
+	s.trienodeHealIdlers[id] = struct{}{}
+	s.bytecodeHealIdlers[id] = struct{}{}
 	s.lock.Unlock()

 	// Notify any active syncs that a new peer can be assigned data
-	s.peerJoin.Send(peer.id)
+	s.peerJoin.Send(id)
 	return nil
 }

@ -566,6 +600,7 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 		s.assignAccountTasks(cancel)
 		s.assignBytecodeTasks(cancel)
 		s.assignStorageTasks(cancel)
+
 		if len(s.tasks) == 0 {
 			// Sync phase done, run heal phase
 			s.assignTrienodeHealTasks(cancel)
@ -580,7 +615,7 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 		case id := <-peerDrop:
 			s.revertRequests(id)
 		case <-cancel:
-			return nil
+			return errCancelled

 		case req := <-s.accountReqFails:
 			s.revertAccountRequest(req)
@ -622,6 +657,7 @@ func (s *Syncer) loadSyncStatus() {
 				log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
 			}
 			s.tasks = progress.Tasks
+			s.snapped = len(s.tasks) == 0

 			s.accountSynced = progress.AccountSynced
 			s.accountBytes = progress.AccountBytes
@ -701,6 +737,11 @@ func (s *Syncer) cleanAccountTasks() {
 			i--
 		}
 	}
+	if len(s.tasks) == 0 {
+		s.lock.Lock()
+		s.snapped = true
+		s.lock.Unlock()
+	}
 }

 // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
@ -798,7 +839,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) {
 		delete(s.accountIdlers, idle)

 		s.pend.Add(1)
-		go func(peer *Peer, root common.Hash) {
+		go func(peer SyncPeer, root common.Hash) {
 			defer s.pend.Done()

 			// Attempt to send the remote request and revert if it fails
@ -885,7 +926,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) {
 		delete(s.bytecodeIdlers, idle)

 		s.pend.Add(1)
-		go func(peer *Peer) {
+		go func(peer SyncPeer) {
 			defer s.pend.Done()

 			// Attempt to send the remote request and revert if it fails
@ -962,7 +1003,6 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) {
 				// Found an incomplete storage chunk, schedule it
 				accounts = append(accounts, account)
 				roots = append(roots, st.root)
-
 				subtask = st
 				break // Large contract chunks are downloaded individually
 			}
@ -1010,7 +1050,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) {
 		delete(s.storageIdlers, idle)

 		s.pend.Add(1)
-		go func(peer *Peer, root common.Hash) {
+		go func(peer SyncPeer, root common.Hash) {
 			defer s.pend.Done()

 			// Attempt to send the remote request and revert if it fails
@ -1125,7 +1165,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) {
 		delete(s.trienodeHealIdlers, idle)

 		s.pend.Add(1)
-		go func(peer *Peer, root common.Hash) {
+		go func(peer SyncPeer, root common.Hash) {
 			defer s.pend.Done()

 			// Attempt to send the remote request and revert if it fails
@ -1223,7 +1263,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) {
 		delete(s.bytecodeHealIdlers, idle)

 		s.pend.Add(1)
-		go func(peer *Peer) {
+		go func(peer SyncPeer) {
 			defer s.pend.Done()

 			// Attempt to send the remote request and revert if it fails
@ -1522,7 +1562,7 @@ func (s *Syncer) processAccountResponse(res *accountResponse) {
 			break
 		}
 	}
-	// Itereate over all the accounts and assemble which ones need further sub-
+	// Iterate over all the accounts and assemble which ones need further sub-
 	// filling before the entire account range can be persisted.
 	res.task.needCode = make([]bool, len(res.accounts))
 	res.task.needState = make([]bool, len(res.accounts))
@ -1650,22 +1690,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 	)
 	// Iterate over all the accounts and reconstruct their storage tries from the
 	// delivered slots
-	delivered := make(map[common.Hash]bool)
-	for i := 0; i < len(res.hashes); i++ {
-		delivered[res.roots[i]] = true
-	}
 	for i, account := range res.accounts {
 		// If the account was not delivered, reschedule it
 		if i >= len(res.hashes) {
-			if !delivered[res.roots[i]] {
 			res.mainTask.stateTasks[account] = res.roots[i]
-			}
 			continue
 		}
 		// State was delivered, if complete mark as not needed any more, otherwise
 		// mark the account as needing healing
-		for j, acc := range res.mainTask.res.accounts {
-			if res.roots[i] == acc.Root {
+		for j, hash := range res.mainTask.res.hashes {
+			if account != hash {
+				continue
+			}
+			acc := res.mainTask.res.accounts[j]
+
 			// If the packet contains multiple contract storage slots, all
 			// but the last are surely complete. The last contract may be
 			// chunked, so check it's continuation flag.
@ -1740,7 +1778,6 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 				}
 			}
 		}
-		}
 		// Iterate over all the reconstructed trie nodes and push them to disk
 		slots += len(res.hashes[i])

@ -1941,7 +1978,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {

 // OnAccounts is a callback method to invoke when a range of accounts are
 // received from a remote peer.
-func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
+func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
 	size := common.StorageSize(len(hashes) * common.HashLength)
 	for _, account := range accounts {
 		size += common.StorageSize(len(account))
@ -1949,15 +1986,15 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account
 	for _, node := range proof {
 		size += common.StorageSize(len(node))
 	}
-	logger := peer.logger.New("reqid", id)
+	logger := peer.Log().New("reqid", id)
 	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)

 	// Whether or not the response is valid, we can mark the peer as idle and
 	// notify the scheduler to assign a new task. If the response is invalid,
 	// we'll drop the peer in a bit.
 	s.lock.Lock()
-	if _, ok := s.peers[peer.id]; ok {
-		s.accountIdlers[peer.id] = struct{}{}
+	if _, ok := s.peers[peer.ID()]; ok {
+		s.accountIdlers[peer.ID()] = struct{}{}
 	}
 	select {
 	case s.update <- struct{}{}:
@ -1975,7 +2012,11 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
-	req.timeout.Stop()
+	if !req.timeout.Stop() {
+		// The timeout is already triggered, and this request will be reverted+rescheduled
+		s.lock.Unlock()
+		return nil
+	}

 	// Response is valid, but check if peer is signalling that it does not have
 	// the requested data. For account range queries that means the state being
@ -1983,7 +2024,7 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account
 	// synced to our head.
 	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
 		logger.Debug("Peer rejected account range request", "root", s.root)
-		s.statelessPeers[peer.id] = struct{}{}
+		s.statelessPeers[peer.ID()] = struct{}{}
 		s.lock.Unlock()

 		// Signal this request as failed, and ready for rescheduling
@ -2011,6 +2052,8 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account
 	db, tr, notary, cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
 	if err != nil {
 		logger.Warn("Account range failed proof", "err", err)
+		// Signal this request as failed, and ready for rescheduling
+		s.scheduleRevertAccountRequest(req)
 		return err
 	}
 	// Partial trie reconstructed, send it to the scheduler for storage filling
@ -2050,9 +2093,9 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account

 // OnByteCodes is a callback method to invoke when a batch of contract
 // bytes codes are received from a remote peer.
-func (s *Syncer) OnByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {
+func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
 	s.lock.RLock()
-	syncing := len(s.tasks) > 0
+	syncing := !s.snapped
 	s.lock.RUnlock()

 	if syncing {
@ -2063,20 +2106,20 @@ func (s *Syncer) OnByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {

 // onByteCodes is a callback method to invoke when a batch of contract
 // bytes codes are received from a remote peer in the syncing phase.
-func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {
+func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
 	var size common.StorageSize
 	for _, code := range bytecodes {
 		size += common.StorageSize(len(code))
 	}
-	logger := peer.logger.New("reqid", id)
+	logger := peer.Log().New("reqid", id)
 	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)

 	// Whether or not the response is valid, we can mark the peer as idle and
 	// notify the scheduler to assign a new task. If the response is invalid,
 	// we'll drop the peer in a bit.
 	s.lock.Lock()
-	if _, ok := s.peers[peer.id]; ok {
-		s.bytecodeIdlers[peer.id] = struct{}{}
+	if _, ok := s.peers[peer.ID()]; ok {
+		s.bytecodeIdlers[peer.ID()] = struct{}{}
 	}
 	select {
 	case s.update <- struct{}{}:
@ -2094,14 +2137,18 @@ func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
-	req.timeout.Stop()
+	if !req.timeout.Stop() {
+		// The timeout is already triggered, and this request will be reverted+rescheduled
+		s.lock.Unlock()
+		return nil
+	}

 	// Response is valid, but check if peer is signalling that it does not have
 	// the requested data. For bytecode range queries that means the peer is not
 	// yet synced.
 	if len(bytecodes) == 0 {
 		logger.Debug("Peer rejected bytecode request")
-		s.statelessPeers[peer.id] = struct{}{}
+		s.statelessPeers[peer.ID()] = struct{}{}
 		s.lock.Unlock()

 		// Signal this request as failed, and ready for rescheduling
@ -2132,6 +2179,8 @@ func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {
 		}
 		// We've either ran out of hashes, or got unrequested data
 		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
+		// Signal this request as failed, and ready for rescheduling
+		s.scheduleRevertBytecodeRequest(req)
 		return errors.New("unexpected bytecode")
 	}
 	// Response validated, send it to the scheduler for filling
@ -2150,7 +2199,7 @@ func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {

 // OnStorage is a callback method to invoke when ranges of storage slots
 // are received from a remote peer.
-func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
+func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
 	// Gather some trace stats to aid in debugging issues
 	var (
 		hashCount int
@ -2170,15 +2219,15 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots
 	for _, node := range proof {
 		size += common.StorageSize(len(node))
 	}
-	logger := peer.logger.New("reqid", id)
+	logger := peer.Log().New("reqid", id)
 	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)

 	// Whether or not the response is valid, we can mark the peer as idle and
 	// notify the scheduler to assign a new task. If the response is invalid,
 	// we'll drop the peer in a bit.
 	s.lock.Lock()
-	if _, ok := s.peers[peer.id]; ok {
-		s.storageIdlers[peer.id] = struct{}{}
+	if _, ok := s.peers[peer.ID()]; ok {
+		s.storageIdlers[peer.ID()] = struct{}{}
 	}
 	select {
 	case s.update <- struct{}{}:
@ -2196,17 +2245,23 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
-	req.timeout.Stop()
+	if !req.timeout.Stop() {
+		// The timeout is already triggered, and this request will be reverted+rescheduled
+		s.lock.Unlock()
+		return nil
+	}

 	// Reject the response if the hash sets and slot sets don't match, or if the
 	// peer sent more data than requested.
 	if len(hashes) != len(slots) {
 		s.lock.Unlock()
+		s.scheduleRevertStorageRequest(req) // reschedule request
 		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
 		return errors.New("hash and slot set size mismatch")
 	}
 	if len(hashes) > len(req.accounts) {
 		s.lock.Unlock()
+		s.scheduleRevertStorageRequest(req) // reschedule request
 		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
 		return errors.New("hash set larger than requested")
 	}
@ -2216,11 +2271,9 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots
 	// synced to our head.
 	if len(hashes) == 0 {
 		logger.Debug("Peer rejected storage request")
-		s.statelessPeers[peer.id] = struct{}{}
+		s.statelessPeers[peer.ID()] = struct{}{}
 		s.lock.Unlock()
-
-		// Signal this request as failed, and ready for rescheduling
-		s.scheduleRevertStorageRequest(req)
+		s.scheduleRevertStorageRequest(req) // reschedule request
 		return nil
 	}
 	s.lock.Unlock()
@ -2250,6 +2303,7 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots
 			// space and hash to the origin root.
 			dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
 			if err != nil {
+				s.scheduleRevertStorageRequest(req) // reschedule request
 				logger.Warn("Storage slots failed proof", "err", err)
 				return err
 			}
@ -2264,6 +2318,7 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots
 			}
 			dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
 			if err != nil {
+				s.scheduleRevertStorageRequest(req) // reschedule request
 				logger.Warn("Storage range failed proof", "err", err)
 				return err
 			}
@ -2302,20 +2357,20 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots

 // OnTrieNodes is a callback method to invoke when a batch of trie nodes
 // are received from a remote peer.
-func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error {
+func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
 	var size common.StorageSize
 	for _, node := range trienodes {
 		size += common.StorageSize(len(node))
 	}
-	logger := peer.logger.New("reqid", id)
+	logger := peer.Log().New("reqid", id)
 	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)

 	// Whether or not the response is valid, we can mark the peer as idle and
 	// notify the scheduler to assign a new task. If the response is invalid,
 	// we'll drop the peer in a bit.
 	s.lock.Lock()
-	if _, ok := s.peers[peer.id]; ok {
-		s.trienodeHealIdlers[peer.id] = struct{}{}
+	if _, ok := s.peers[peer.ID()]; ok {
+		s.trienodeHealIdlers[peer.ID()] = struct{}{}
 	}
 	select {
 	case s.update <- struct{}{}:
@ -2333,14 +2388,18 @@ func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error {

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
-	req.timeout.Stop()
+	if !req.timeout.Stop() {
+		// The timeout is already triggered, and this request will be reverted+rescheduled
+		s.lock.Unlock()
+		return nil
+	}

 	// Response is valid, but check if peer is signalling that it does not have
 	// the requested data. For bytecode range queries that means the peer is not
 	// yet synced.
 	if len(trienodes) == 0 {
 		logger.Debug("Peer rejected trienode heal request")
-		s.statelessPeers[peer.id] = struct{}{}
+		s.statelessPeers[peer.ID()] = struct{}{}
 		s.lock.Unlock()

 		// Signal this request as failed, and ready for rescheduling
@ -2371,6 +2430,8 @@ func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error {
 		}
 		// We've either ran out of hashes, or got unrequested data
 		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
+		// Signal this request as failed, and ready for rescheduling
+		s.scheduleRevertTrienodeHealRequest(req)
 		return errors.New("unexpected healing trienode")
 	}
 	// Response validated, send it to the scheduler for filling
@ -2390,20 +2451,20 @@ func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error {

 // onHealByteCodes is a callback method to invoke when a batch of contract
 // bytes codes are received from a remote peer in the healing phase.
-func (s *Syncer) onHealByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error {
+func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
 	var size common.StorageSize
 	for _, code := range bytecodes {
 		size += common.StorageSize(len(code))
 	}
-	logger := peer.logger.New("reqid", id)
+	logger := peer.Log().New("reqid", id)
 	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)

 	// Whether or not the response is valid, we can mark the peer as idle and
 	// notify the scheduler to assign a new task. If the response is invalid,
 	// we'll drop the peer in a bit.
 	s.lock.Lock()
-	if _, ok := s.peers[peer.id]; ok {
-		s.bytecodeHealIdlers[peer.id] = struct{}{}
+	if _, ok := s.peers[peer.ID()]; ok {
+		s.bytecodeHealIdlers[peer.ID()] = struct{}{}
 	}
 	select {
 	case s.update <- struct{}{}:
@ -2421,14 +2482,18 @@ func (s *Syncer) onHealByteCodes(peer *Peer, id uint64, bytecodes [][]byte) erro

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
-	req.timeout.Stop()
+	if !req.timeout.Stop() {
+		// The timeout is already triggered, and this request will be reverted+rescheduled
+		s.lock.Unlock()
+		return nil
+	}

 	// Response is valid, but check if peer is signalling that it does not have
 	// the requested data. For bytecode range queries that means the peer is not
 	// yet synced.
 	if len(bytecodes) == 0 {
 		logger.Debug("Peer rejected bytecode heal request")
-		s.statelessPeers[peer.id] = struct{}{}
+		s.statelessPeers[peer.ID()] = struct{}{}
 		s.lock.Unlock()

 		// Signal this request as failed, and ready for rescheduling
@ -2459,6 +2524,8 @@ func (s *Syncer) onHealByteCodes(peer *Peer, id uint64, bytecodes [][]byte) erro
 		}
 		// We've either ran out of hashes, or got unrequested data
 		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
+		// Signal this request as failed, and ready for rescheduling
+		s.scheduleRevertBytecodeHealRequest(req)
 		return errors.New("unexpected healing bytecode")
 	}
 	// Response validated, send it to the scheduler for filling
--- a/eth/protocols/snap/sync_test.go
+++ b/eth/protocols/snap/sync_test.go