plugeth/eth/downloader/beaconsync.go
rjl493456442 5c67066a05
eth/downloader: fix skeleton cleanup (#28581)
* eth/downloader: fix skeleton cleanup

* eth/downloader: short circuit if nothing to delete

* eth/downloader: polish the logic in cleanup

* eth/downloader: address comments
2024-01-31 10:57:33 +02:00

391 lines
15 KiB
Go

// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package downloader
import (
"fmt"
"sync"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/log"
)
// beaconBackfiller is the chain and state backfilling that can be commenced once
// the skeleton syncer has successfully reverse downloaded all the headers up to
// the genesis block or an existing header in the database. Its operation is fully
// directed by the skeleton sync's head/tail events.
type beaconBackfiller struct {
downloader *Downloader // Downloader to direct via this callback implementation
syncMode SyncMode // Sync mode to use for backfilling the skeleton chains
success func() // Callback to run on successful sync cycle completion
filling bool // Flag whether the downloader is backfilling or not
filled *types.Header // Last header filled by the last terminated sync loop
started chan struct{} // Notification channel whether the downloader inited
lock sync.Mutex // Mutex protecting the sync lock
}
// newBeaconBackfiller is a helper method to create the backfiller.
func newBeaconBackfiller(dl *Downloader, success func()) backfiller {
return &beaconBackfiller{
downloader: dl,
success: success,
}
}
// suspend cancels any background downloader threads and returns the last header
// that has been successfully backfilled (potentially in a previous run), or the
// genesis.
func (b *beaconBackfiller) suspend() *types.Header {
// If no filling is running, don't waste cycles
b.lock.Lock()
filling := b.filling
filled := b.filled
started := b.started
b.lock.Unlock()
if !filling {
return filled // Return the filled header on the previous sync completion
}
// A previous filling should be running, though it may happen that it hasn't
// yet started (being done on a new goroutine). Many concurrent beacon head
// announcements can lead to sync start/stop thrashing. In that case we need
// to wait for initialization before we can safely cancel it. It is safe to
// read this channel multiple times, it gets closed on startup.
<-started
// Now that we're sure the downloader successfully started up, we can cancel
// it safely without running the risk of data races.
b.downloader.Cancel()
// Sync cycle was just terminated, retrieve and return the last filled header.
// Can't use `filled` as that contains a stale value from before cancellation.
return b.downloader.blockchain.CurrentSnapBlock()
}
// resume starts the downloader threads for backfilling state and chain data.
func (b *beaconBackfiller) resume() {
b.lock.Lock()
if b.filling {
// If a previous filling cycle is still running, just ignore this start
// request. // TODO(karalabe): We should make this channel driven
b.lock.Unlock()
return
}
b.filling = true
b.filled = nil
b.started = make(chan struct{})
mode := b.syncMode
b.lock.Unlock()
// Start the backfilling on its own thread since the downloader does not have
// its own lifecycle runloop.
go func() {
// Set the backfiller to non-filling when download completes
defer func() {
b.lock.Lock()
b.filling = false
b.filled = b.downloader.blockchain.CurrentSnapBlock()
b.lock.Unlock()
}()
// If the downloader fails, report an error as in beacon chain mode there
// should be no errors as long as the chain we're syncing to is valid.
if err := b.downloader.synchronise("", common.Hash{}, nil, nil, mode, true, b.started); err != nil {
log.Error("Beacon backfilling failed", "err", err)
return
}
// Synchronization succeeded. Since this happens async, notify the outer
// context to disable snap syncing and enable transaction propagation.
if b.success != nil {
b.success()
}
}()
}
// setMode updates the sync mode from the current one to the requested one. If
// there's an active sync in progress, it will be cancelled and restarted.
func (b *beaconBackfiller) setMode(mode SyncMode) {
// Update the old sync mode and track if it was changed
b.lock.Lock()
updated := b.syncMode != mode
filling := b.filling
b.syncMode = mode
b.lock.Unlock()
// If the sync mode was changed mid-sync, restart. This should never ever
// really happen, we just handle it to detect programming errors.
if !updated || !filling {
return
}
log.Error("Downloader sync mode changed mid-run", "old", mode.String(), "new", mode.String())
b.suspend()
b.resume()
}
// SetBadBlockCallback sets the callback to run when a bad block is hit by the
// block processor. This method is not thread safe and should be set only once
// on startup before system events are fired.
func (d *Downloader) SetBadBlockCallback(onBadBlock badBlockFn) {
d.badBlock = onBadBlock
}
// BeaconSync is the post-merge version of the chain synchronization, where the
// chain is not downloaded from genesis onward, rather from trusted head announces
// backwards.
//
// Internally backfilling and state sync is done the same way, but the header
// retrieval and scheduling is replaced.
func (d *Downloader) BeaconSync(mode SyncMode, head *types.Header, final *types.Header) error {
return d.beaconSync(mode, head, final, true)
}
// BeaconExtend is an optimistic version of BeaconSync, where an attempt is made
// to extend the current beacon chain with a new header, but in case of a mismatch,
// the old sync will not be terminated and reorged, rather the new head is dropped.
//
// This is useful if a beacon client is feeding us large chunks of payloads to run,
// but is not setting the head after each.
func (d *Downloader) BeaconExtend(mode SyncMode, head *types.Header) error {
return d.beaconSync(mode, head, nil, false)
}
// beaconSync is the post-merge version of the chain synchronization, where the
// chain is not downloaded from genesis onward, rather from trusted head announces
// backwards.
//
// Internally backfilling and state sync is done the same way, but the header
// retrieval and scheduling is replaced.
func (d *Downloader) beaconSync(mode SyncMode, head *types.Header, final *types.Header, force bool) error {
// When the downloader starts a sync cycle, it needs to be aware of the sync
// mode to use (full, snap). To keep the skeleton chain oblivious, inject the
// mode into the backfiller directly.
//
// Super crazy dangerous type cast. Should be fine (TM), we're only using a
// different backfiller implementation for skeleton tests.
d.skeleton.filler.(*beaconBackfiller).setMode(mode)
// Signal the skeleton sync to switch to a new head, however it wants
if err := d.skeleton.Sync(head, final, force); err != nil {
return err
}
return nil
}
// findBeaconAncestor tries to locate the common ancestor link of the local chain
// and the beacon chain just requested. In the general case when our node was in
// sync and on the correct chain, checking the top N links should already get us
// a match. In the rare scenario when we ended up on a long reorganisation (i.e.
// none of the head links match), we do a binary search to find the ancestor.
func (d *Downloader) findBeaconAncestor() (uint64, error) {
// Figure out the current local head position
var chainHead *types.Header
switch d.getMode() {
case FullSync:
chainHead = d.blockchain.CurrentBlock()
case SnapSync:
chainHead = d.blockchain.CurrentSnapBlock()
default:
chainHead = d.lightchain.CurrentHeader()
}
number := chainHead.Number.Uint64()
// Retrieve the skeleton bounds and ensure they are linked to the local chain
beaconHead, beaconTail, _, err := d.skeleton.Bounds()
if err != nil {
// This is a programming error. The chain backfiller was called with an
// invalid beacon sync state. Ideally we would panic here, but erroring
// gives us at least a remote chance to recover. It's still a big fault!
log.Error("Failed to retrieve beacon bounds", "err", err)
return 0, err
}
var linked bool
switch d.getMode() {
case FullSync:
linked = d.blockchain.HasBlock(beaconTail.ParentHash, beaconTail.Number.Uint64()-1)
case SnapSync:
linked = d.blockchain.HasFastBlock(beaconTail.ParentHash, beaconTail.Number.Uint64()-1)
default:
linked = d.blockchain.HasHeader(beaconTail.ParentHash, beaconTail.Number.Uint64()-1)
}
if !linked {
// This is a programming error. The chain backfiller was called with a
// tail that's not linked to the local chain. Whilst this should never
// happen, there might be some weirdnesses if beacon sync backfilling
// races with the user (or beacon client) calling setHead. Whilst panic
// would be the ideal thing to do, it is safer long term to attempt a
// recovery and fix any noticed issue after the fact.
log.Error("Beacon sync linkup unavailable", "number", beaconTail.Number.Uint64()-1, "hash", beaconTail.ParentHash)
return 0, fmt.Errorf("beacon linkup unavailable locally: %d [%x]", beaconTail.Number.Uint64()-1, beaconTail.ParentHash)
}
// Binary search to find the ancestor
start, end := beaconTail.Number.Uint64()-1, number
if number := beaconHead.Number.Uint64(); end > number {
// This shouldn't really happen in a healthy network, but if the consensus
// clients feeds us a shorter chain as the canonical, we should not attempt
// to access non-existent skeleton items.
log.Warn("Beacon head lower than local chain", "beacon", number, "local", end)
end = number
}
for start+1 < end {
// Split our chain interval in two, and request the hash to cross check
check := (start + end) / 2
h := d.skeleton.Header(check)
n := h.Number.Uint64()
var known bool
switch d.getMode() {
case FullSync:
known = d.blockchain.HasBlock(h.Hash(), n)
case SnapSync:
known = d.blockchain.HasFastBlock(h.Hash(), n)
default:
known = d.lightchain.HasHeader(h.Hash(), n)
}
if !known {
end = check
continue
}
start = check
}
return start, nil
}
// fetchBeaconHeaders feeds skeleton headers to the downloader queue for scheduling
// until sync errors or is finished.
func (d *Downloader) fetchBeaconHeaders(from uint64) error {
var head *types.Header
_, tail, _, err := d.skeleton.Bounds()
if err != nil {
return err
}
// A part of headers are not in the skeleton space, try to resolve
// them from the local chain. Note the range should be very short
// and it should only happen when there are less than 64 post-merge
// blocks in the network.
var localHeaders []*types.Header
if from < tail.Number.Uint64() {
count := tail.Number.Uint64() - from
if count > uint64(fsMinFullBlocks) {
return fmt.Errorf("invalid origin (%d) of beacon sync (%d)", from, tail.Number)
}
localHeaders = d.readHeaderRange(tail, int(count))
log.Warn("Retrieved beacon headers from local", "from", from, "count", count)
}
for {
// Some beacon headers might have appeared since the last cycle, make
// sure we're always syncing to all available ones
head, _, _, err = d.skeleton.Bounds()
if err != nil {
return err
}
// If the pivot became stale (older than 2*64-8 (bit of wiggle room)),
// move it ahead to HEAD-64
d.pivotLock.Lock()
if d.pivotHeader != nil {
if head.Number.Uint64() > d.pivotHeader.Number.Uint64()+2*uint64(fsMinFullBlocks)-8 {
// Retrieve the next pivot header, either from skeleton chain
// or the filled chain
number := head.Number.Uint64() - uint64(fsMinFullBlocks)
log.Warn("Pivot seemingly stale, moving", "old", d.pivotHeader.Number, "new", number)
if d.pivotHeader = d.skeleton.Header(number); d.pivotHeader == nil {
if number < tail.Number.Uint64() {
dist := tail.Number.Uint64() - number
if len(localHeaders) >= int(dist) {
d.pivotHeader = localHeaders[dist-1]
log.Warn("Retrieved pivot header from local", "number", d.pivotHeader.Number, "hash", d.pivotHeader.Hash(), "latest", head.Number, "oldest", tail.Number)
}
}
}
// Print an error log and return directly in case the pivot header
// is still not found. It means the skeleton chain is not linked
// correctly with local chain.
if d.pivotHeader == nil {
log.Error("Pivot header is not found", "number", number)
d.pivotLock.Unlock()
return errNoPivotHeader
}
// Write out the pivot into the database so a rollback beyond
// it will reenable snap sync and update the state root that
// the state syncer will be downloading
rawdb.WriteLastPivotNumber(d.stateDB, d.pivotHeader.Number.Uint64())
}
}
d.pivotLock.Unlock()
// Retrieve a batch of headers and feed it to the header processor
var (
headers = make([]*types.Header, 0, maxHeadersProcess)
hashes = make([]common.Hash, 0, maxHeadersProcess)
)
for i := 0; i < maxHeadersProcess && from <= head.Number.Uint64(); i++ {
header := d.skeleton.Header(from)
// The header is not found in skeleton space, try to find it in local chain.
if header == nil && from < tail.Number.Uint64() {
dist := tail.Number.Uint64() - from
if len(localHeaders) >= int(dist) {
header = localHeaders[dist-1]
}
}
// The header is still missing, the beacon sync is corrupted and bail out
// the error here.
if header == nil {
return fmt.Errorf("missing beacon header %d", from)
}
headers = append(headers, header)
hashes = append(hashes, headers[i].Hash())
from++
}
if len(headers) > 0 {
log.Trace("Scheduling new beacon headers", "count", len(headers), "from", from-uint64(len(headers)))
select {
case d.headerProcCh <- &headerTask{
headers: headers,
hashes: hashes,
}:
case <-d.cancelCh:
return errCanceled
}
}
// If we still have headers to import, loop and keep pushing them
if from <= head.Number.Uint64() {
continue
}
// If the pivot block is committed, signal header sync termination
if d.committed.Load() {
select {
case d.headerProcCh <- nil:
return nil
case <-d.cancelCh:
return errCanceled
}
}
// State sync still going, wait a bit for new headers and retry
log.Trace("Pivot not yet committed, waiting...")
select {
case <-time.After(fsHeaderContCheck):
case <-d.cancelCh:
return errCanceled
}
}
}