Merge pull request #22678 from karalabe/snap-ephemeral-channels

eth/protocols/snap: use ephemeral channels to avoid cross-sync delveries
This commit is contained in:
Péter Szilágyi 2021-04-16 09:27:39 +03:00 committed by GitHub
commit f8afb681dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -106,6 +106,8 @@ type accountRequest struct {
peer string // Peer to which this request is assigned peer string // Peer to which this request is assigned
id uint64 // Request ID of this request id uint64 // Request ID of this request
deliver chan *accountResponse // Channel to deliver successful response on
revert chan *accountRequest // Channel to deliver request failure on
cancel chan struct{} // Channel to track sync cancellation cancel chan struct{} // Channel to track sync cancellation
timeout *time.Timer // Timer to track delivery timeout timeout *time.Timer // Timer to track delivery timeout
stale chan struct{} // Channel to signal the request was dropped stale chan struct{} // Channel to signal the request was dropped
@ -147,6 +149,8 @@ type bytecodeRequest struct {
peer string // Peer to which this request is assigned peer string // Peer to which this request is assigned
id uint64 // Request ID of this request id uint64 // Request ID of this request
deliver chan *bytecodeResponse // Channel to deliver successful response on
revert chan *bytecodeRequest // Channel to deliver request failure on
cancel chan struct{} // Channel to track sync cancellation cancel chan struct{} // Channel to track sync cancellation
timeout *time.Timer // Timer to track delivery timeout timeout *time.Timer // Timer to track delivery timeout
stale chan struct{} // Channel to signal the request was dropped stale chan struct{} // Channel to signal the request was dropped
@ -176,6 +180,8 @@ type storageRequest struct {
peer string // Peer to which this request is assigned peer string // Peer to which this request is assigned
id uint64 // Request ID of this request id uint64 // Request ID of this request
deliver chan *storageResponse // Channel to deliver successful response on
revert chan *storageRequest // Channel to deliver request failure on
cancel chan struct{} // Channel to track sync cancellation cancel chan struct{} // Channel to track sync cancellation
timeout *time.Timer // Timer to track delivery timeout timeout *time.Timer // Timer to track delivery timeout
stale chan struct{} // Channel to signal the request was dropped stale chan struct{} // Channel to signal the request was dropped
@ -224,6 +230,8 @@ type trienodeHealRequest struct {
peer string // Peer to which this request is assigned peer string // Peer to which this request is assigned
id uint64 // Request ID of this request id uint64 // Request ID of this request
deliver chan *trienodeHealResponse // Channel to deliver successful response on
revert chan *trienodeHealRequest // Channel to deliver request failure on
cancel chan struct{} // Channel to track sync cancellation cancel chan struct{} // Channel to track sync cancellation
timeout *time.Timer // Timer to track delivery timeout timeout *time.Timer // Timer to track delivery timeout
stale chan struct{} // Channel to signal the request was dropped stale chan struct{} // Channel to signal the request was dropped
@ -256,6 +264,8 @@ type bytecodeHealRequest struct {
peer string // Peer to which this request is assigned peer string // Peer to which this request is assigned
id uint64 // Request ID of this request id uint64 // Request ID of this request
deliver chan *bytecodeHealResponse // Channel to deliver successful response on
revert chan *bytecodeHealRequest // Channel to deliver request failure on
cancel chan struct{} // Channel to track sync cancellation cancel chan struct{} // Channel to track sync cancellation
timeout *time.Timer // Timer to track delivery timeout timeout *time.Timer // Timer to track delivery timeout
stale chan struct{} // Channel to signal the request was dropped stale chan struct{} // Channel to signal the request was dropped
@ -399,14 +409,6 @@ type Syncer struct {
bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
storageReqs map[uint64]*storageRequest // Storage requests currently running storageReqs map[uint64]*storageRequest // Storage requests currently running
accountReqFails chan *accountRequest // Failed account range requests to revert
bytecodeReqFails chan *bytecodeRequest // Failed bytecode requests to revert
storageReqFails chan *storageRequest // Failed storage requests to revert
accountResps chan *accountResponse // Account sub-tries to integrate into the database
bytecodeResps chan *bytecodeResponse // Bytecodes to integrate into the database
storageResps chan *storageResponse // Storage sub-tries to integrate into the database
accountSynced uint64 // Number of accounts downloaded accountSynced uint64 // Number of accounts downloaded
accountBytes common.StorageSize // Number of account trie bytes persisted to disk accountBytes common.StorageSize // Number of account trie bytes persisted to disk
bytecodeSynced uint64 // Number of bytecodes downloaded bytecodeSynced uint64 // Number of bytecodes downloaded
@ -421,12 +423,6 @@ type Syncer struct {
trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
trienodeHealReqFails chan *trienodeHealRequest // Failed trienode requests to revert
bytecodeHealReqFails chan *bytecodeHealRequest // Failed bytecode requests to revert
trienodeHealResps chan *trienodeHealResponse // Trie nodes to integrate into the database
bytecodeHealResps chan *bytecodeHealResponse // Bytecodes to integrate into the database
trienodeHealSynced uint64 // Number of state trie nodes downloaded trienodeHealSynced uint64 // Number of state trie nodes downloaded
trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk
trienodeHealDups uint64 // Number of state trie nodes already processed trienodeHealDups uint64 // Number of state trie nodes already processed
@ -467,22 +463,12 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer {
accountReqs: make(map[uint64]*accountRequest), accountReqs: make(map[uint64]*accountRequest),
storageReqs: make(map[uint64]*storageRequest), storageReqs: make(map[uint64]*storageRequest),
bytecodeReqs: make(map[uint64]*bytecodeRequest), bytecodeReqs: make(map[uint64]*bytecodeRequest),
accountReqFails: make(chan *accountRequest),
storageReqFails: make(chan *storageRequest),
bytecodeReqFails: make(chan *bytecodeRequest),
accountResps: make(chan *accountResponse),
storageResps: make(chan *storageResponse),
bytecodeResps: make(chan *bytecodeResponse),
trienodeHealIdlers: make(map[string]struct{}), trienodeHealIdlers: make(map[string]struct{}),
bytecodeHealIdlers: make(map[string]struct{}), bytecodeHealIdlers: make(map[string]struct{}),
trienodeHealReqs: make(map[uint64]*trienodeHealRequest), trienodeHealReqs: make(map[uint64]*trienodeHealRequest),
bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest),
trienodeHealReqFails: make(chan *trienodeHealRequest),
bytecodeHealReqFails: make(chan *bytecodeHealRequest),
trienodeHealResps: make(chan *trienodeHealResponse),
bytecodeHealResps: make(chan *bytecodeHealResponse),
stateWriter: db.NewBatch(), stateWriter: db.NewBatch(),
} }
} }
@ -611,6 +597,21 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
peerDropSub := s.peerDrop.Subscribe(peerDrop) peerDropSub := s.peerDrop.Subscribe(peerDrop)
defer peerDropSub.Unsubscribe() defer peerDropSub.Unsubscribe()
// Create a set of unique channels for this sync cycle. We need these to be
// ephemeral so a data race doesn't accidentally deliver something stale on
// a persistent channel across syncs (yup, this happened)
var (
accountReqFails = make(chan *accountRequest)
storageReqFails = make(chan *storageRequest)
bytecodeReqFails = make(chan *bytecodeRequest)
accountResps = make(chan *accountResponse)
storageResps = make(chan *storageResponse)
bytecodeResps = make(chan *bytecodeResponse)
trienodeHealReqFails = make(chan *trienodeHealRequest)
bytecodeHealReqFails = make(chan *bytecodeHealRequest)
trienodeHealResps = make(chan *trienodeHealResponse)
bytecodeHealResps = make(chan *bytecodeHealResponse)
)
for { for {
// Remove all completed tasks and terminate sync if everything's done // Remove all completed tasks and terminate sync if everything's done
s.cleanStorageTasks() s.cleanStorageTasks()
@ -619,14 +620,14 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
return nil return nil
} }
// Assign all the data retrieval tasks to any free peers // Assign all the data retrieval tasks to any free peers
s.assignAccountTasks(cancel) s.assignAccountTasks(accountResps, accountReqFails, cancel)
s.assignBytecodeTasks(cancel) s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel)
s.assignStorageTasks(cancel) s.assignStorageTasks(storageResps, storageReqFails, cancel)
if len(s.tasks) == 0 { if len(s.tasks) == 0 {
// Sync phase done, run heal phase // Sync phase done, run heal phase
s.assignTrienodeHealTasks(cancel) s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel)
s.assignBytecodeHealTasks(cancel) s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel)
} }
// Wait for something to happen // Wait for something to happen
select { select {
@ -639,26 +640,26 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
case <-cancel: case <-cancel:
return ErrCancelled return ErrCancelled
case req := <-s.accountReqFails: case req := <-accountReqFails:
s.revertAccountRequest(req) s.revertAccountRequest(req)
case req := <-s.bytecodeReqFails: case req := <-bytecodeReqFails:
s.revertBytecodeRequest(req) s.revertBytecodeRequest(req)
case req := <-s.storageReqFails: case req := <-storageReqFails:
s.revertStorageRequest(req) s.revertStorageRequest(req)
case req := <-s.trienodeHealReqFails: case req := <-trienodeHealReqFails:
s.revertTrienodeHealRequest(req) s.revertTrienodeHealRequest(req)
case req := <-s.bytecodeHealReqFails: case req := <-bytecodeHealReqFails:
s.revertBytecodeHealRequest(req) s.revertBytecodeHealRequest(req)
case res := <-s.accountResps: case res := <-accountResps:
s.processAccountResponse(res) s.processAccountResponse(res)
case res := <-s.bytecodeResps: case res := <-bytecodeResps:
s.processBytecodeResponse(res) s.processBytecodeResponse(res)
case res := <-s.storageResps: case res := <-storageResps:
s.processStorageResponse(res) s.processStorageResponse(res)
case res := <-s.trienodeHealResps: case res := <-trienodeHealResps:
s.processTrienodeHealResponse(res) s.processTrienodeHealResponse(res)
case res := <-s.bytecodeHealResps: case res := <-bytecodeHealResps:
s.processBytecodeHealResponse(res) s.processBytecodeHealResponse(res)
} }
// Report stats if something meaningful happened // Report stats if something meaningful happened
@ -801,7 +802,7 @@ func (s *Syncer) cleanStorageTasks() {
// assignAccountTasks attempts to match idle peers to pending account range // assignAccountTasks attempts to match idle peers to pending account range
// retrievals. // retrievals.
func (s *Syncer) assignAccountTasks(cancel chan struct{}) { func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) {
s.lock.Lock() s.lock.Lock()
defer s.lock.Unlock() defer s.lock.Unlock()
@ -849,6 +850,8 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) {
req := &accountRequest{ req := &accountRequest{
peer: idle, peer: idle,
id: reqid, id: reqid,
deliver: success,
revert: fail,
cancel: cancel, cancel: cancel,
stale: make(chan struct{}), stale: make(chan struct{}),
origin: task.Next, origin: task.Next,
@ -879,7 +882,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) {
} }
// assignBytecodeTasks attempts to match idle peers to pending code retrievals. // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) {
s.lock.Lock() s.lock.Lock()
defer s.lock.Unlock() defer s.lock.Unlock()
@ -939,6 +942,8 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) {
req := &bytecodeRequest{ req := &bytecodeRequest{
peer: idle, peer: idle,
id: reqid, id: reqid,
deliver: success,
revert: fail,
cancel: cancel, cancel: cancel,
stale: make(chan struct{}), stale: make(chan struct{}),
hashes: hashes, hashes: hashes,
@ -966,7 +971,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) {
// assignStorageTasks attempts to match idle peers to pending storage range // assignStorageTasks attempts to match idle peers to pending storage range
// retrievals. // retrievals.
func (s *Syncer) assignStorageTasks(cancel chan struct{}) { func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) {
s.lock.Lock() s.lock.Lock()
defer s.lock.Unlock() defer s.lock.Unlock()
@ -1059,6 +1064,8 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) {
req := &storageRequest{ req := &storageRequest{
peer: idle, peer: idle,
id: reqid, id: reqid,
deliver: success,
revert: fail,
cancel: cancel, cancel: cancel,
stale: make(chan struct{}), stale: make(chan struct{}),
accounts: accounts, accounts: accounts,
@ -1101,7 +1108,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) {
// assignTrienodeHealTasks attempts to match idle peers to trie node requests to // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
// heal any trie errors caused by the snap sync's chunked retrieval model. // heal any trie errors caused by the snap sync's chunked retrieval model.
func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) {
s.lock.Lock() s.lock.Lock()
defer s.lock.Unlock() defer s.lock.Unlock()
@ -1181,6 +1188,8 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) {
req := &trienodeHealRequest{ req := &trienodeHealRequest{
peer: idle, peer: idle,
id: reqid, id: reqid,
deliver: success,
revert: fail,
cancel: cancel, cancel: cancel,
stale: make(chan struct{}), stale: make(chan struct{}),
hashes: hashes, hashes: hashes,
@ -1209,7 +1218,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) {
// assignBytecodeHealTasks attempts to match idle peers to bytecode requests to // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
// heal any trie errors caused by the snap sync's chunked retrieval model. // heal any trie errors caused by the snap sync's chunked retrieval model.
func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) {
s.lock.Lock() s.lock.Lock()
defer s.lock.Unlock() defer s.lock.Unlock()
@ -1282,6 +1291,8 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) {
req := &bytecodeHealRequest{ req := &bytecodeHealRequest{
peer: idle, peer: idle,
id: reqid, id: reqid,
deliver: success,
revert: fail,
cancel: cancel, cancel: cancel,
stale: make(chan struct{}), stale: make(chan struct{}),
hashes: hashes, hashes: hashes,
@ -1366,7 +1377,7 @@ func (s *Syncer) revertRequests(peer string) {
// request and return all failed retrieval tasks to the scheduler for reassignment. // request and return all failed retrieval tasks to the scheduler for reassignment.
func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
select { select {
case s.accountReqFails <- req: case req.revert <- req:
// Sync event loop notified // Sync event loop notified
case <-req.cancel: case <-req.cancel:
// Sync cycle got cancelled // Sync cycle got cancelled
@ -1407,7 +1418,7 @@ func (s *Syncer) revertAccountRequest(req *accountRequest) {
// and return all failed retrieval tasks to the scheduler for reassignment. // and return all failed retrieval tasks to the scheduler for reassignment.
func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
select { select {
case s.bytecodeReqFails <- req: case req.revert <- req:
// Sync event loop notified // Sync event loop notified
case <-req.cancel: case <-req.cancel:
// Sync cycle got cancelled // Sync cycle got cancelled
@ -1448,7 +1459,7 @@ func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
// request and return all failed retrieval tasks to the scheduler for reassignment. // request and return all failed retrieval tasks to the scheduler for reassignment.
func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
select { select {
case s.storageReqFails <- req: case req.revert <- req:
// Sync event loop notified // Sync event loop notified
case <-req.cancel: case <-req.cancel:
// Sync cycle got cancelled // Sync cycle got cancelled
@ -1493,7 +1504,7 @@ func (s *Syncer) revertStorageRequest(req *storageRequest) {
// request and return all failed retrieval tasks to the scheduler for reassignment. // request and return all failed retrieval tasks to the scheduler for reassignment.
func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
select { select {
case s.trienodeHealReqFails <- req: case req.revert <- req:
// Sync event loop notified // Sync event loop notified
case <-req.cancel: case <-req.cancel:
// Sync cycle got cancelled // Sync cycle got cancelled
@ -1534,7 +1545,7 @@ func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
// request and return all failed retrieval tasks to the scheduler for reassignment. // request and return all failed retrieval tasks to the scheduler for reassignment.
func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
select { select {
case s.bytecodeHealReqFails <- req: case req.revert <- req:
// Sync event loop notified // Sync event loop notified
case <-req.cancel: case <-req.cancel:
// Sync cycle got cancelled // Sync cycle got cancelled
@ -2147,7 +2158,7 @@ func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, acco
cont: cont, cont: cont,
} }
select { select {
case s.accountResps <- response: case req.deliver <- response:
case <-req.cancel: case <-req.cancel:
case <-req.stale: case <-req.stale:
} }
@ -2253,7 +2264,7 @@ func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error
codes: codes, codes: codes,
} }
select { select {
case s.bytecodeResps <- response: case req.deliver <- response:
case <-req.cancel: case <-req.cancel:
case <-req.stale: case <-req.stale:
} }
@ -2411,7 +2422,7 @@ func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slo
cont: cont, cont: cont,
} }
select { select {
case s.storageResps <- response: case req.deliver <- response:
case <-req.cancel: case <-req.cancel:
case <-req.stale: case <-req.stale:
} }
@ -2505,7 +2516,7 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error
nodes: nodes, nodes: nodes,
} }
select { select {
case s.trienodeHealResps <- response: case req.deliver <- response:
case <-req.cancel: case <-req.cancel:
case <-req.stale: case <-req.stale:
} }
@ -2598,7 +2609,7 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e
codes: codes, codes: codes,
} }
select { select {
case s.bytecodeHealResps <- response: case req.deliver <- response:
case <-req.cancel: case <-req.cancel:
case <-req.stale: case <-req.stale:
} }