parallel chain sync

This commit is contained in:
vyzo 2020-09-16 21:04:44 +03:00
parent 61c0b8c3db
commit 35f6e10646

View File

@ -7,7 +7,6 @@ import (
"fmt"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
@ -63,20 +62,12 @@ var (
// where the Syncer publishes candidate chain heads to be synced.
LocalIncoming = "incoming"
log = logging.Logger("chain")
defaultMessageFetchWindowSize = 200
)
log = logging.Logger("chain")
func init() {
if s := os.Getenv("LOTUS_BSYNC_MSG_WINDOW"); s != "" {
val, err := strconv.Atoi(s)
if err != nil {
log.Errorf("failed to parse LOTUS_BSYNC_MSG_WINDOW: %s", err)
return
}
defaultMessageFetchWindowSize = val
}
}
concurrentSyncRequests = 16
syncRequestBatchSize = 4
syncRequestRetries = 5
)
// Syncer is in charge of running the chain synchronization logic. As such, it
// is tasked with these functions, amongst others:
@ -132,8 +123,6 @@ type Syncer struct {
verifier ffiwrapper.Verifier
windowSize int
tickerCtxCancel context.CancelFunc
checkptLk sync.Mutex
@ -175,7 +164,6 @@ func NewSyncer(ds dtypes.MetadataDS, sm *stmgr.StateManager, exchange exchange.C
receiptTracker: newBlockReceiptTracker(),
connmgr: connmgr,
verifier: verifier,
windowSize: defaultMessageFetchWindowSize,
incoming: pubsub.New(50),
}
@ -1483,8 +1471,6 @@ func (syncer *Syncer) iterFullTipsets(ctx context.Context, headers []*types.TipS
span.AddAttributes(trace.Int64Attribute("num_headers", int64(len(headers))))
windowSize := syncer.windowSize
mainLoop:
for i := len(headers) - 1; i >= 0; {
fts, err := syncer.store.TryFillTipSet(headers[i])
if err != nil {
@ -1498,35 +1484,73 @@ mainLoop:
continue
}
batchSize := windowSize
batchSize := concurrentSyncRequests * syncRequestBatchSize
if i < batchSize {
batchSize = i
if i == 0 {
batchSize = 1
} else {
batchSize = i
}
}
nextI := (i + 1) - batchSize // want to fetch batchSize values, 'i' points to last one we want to fetch, so its 'inclusive' of our request, thus we need to add one to our request start index
ss.SetStage(api.StageFetchingMessages)
var bstout []*exchange.CompactedMessages
for len(bstout) < batchSize {
next := headers[nextI]
bstout := make([]*exchange.CompactedMessages, batchSize)
var wg sync.WaitGroup
var mx sync.Mutex
var batchErr error
for j := 0; j < batchSize; j += syncRequestBatchSize {
wg.Add(1)
go func(j int) {
defer wg.Done()
nreq := batchSize - len(bstout)
bstips, err := syncer.Exchange.GetChainMessages(ctx, next, uint64(nreq))
if err != nil {
// TODO check errors for temporary nature
if windowSize > 1 {
windowSize /= 2
log.Infof("error fetching messages: %s; reducing window size to %d and trying again", err, windowSize)
continue mainLoop
nreq := syncRequestBatchSize
if j*syncRequestBatchSize+nreq > batchSize {
nreq = batchSize - j*syncRequestBatchSize
}
return xerrors.Errorf("message processing failed: %w", err)
}
bstout = append(bstout, bstips...)
nextI += len(bstips)
failed := false
for offset := 0; !failed && offset < nreq; {
nextI := (i + 1) - batchSize + j*syncRequestBatchSize + offset
nextHeader := headers[nextI]
var requestErr error
var requestResult []*exchange.CompactedMessages
for retry := 0; requestResult == nil && retry < syncRequestRetries; retry++ {
if retry > 0 {
log.Infof("fetching messages at %d (retry %d)", nextI, retry)
} else {
log.Infof("fetching messages at %d", nextI)
}
result, err := syncer.Exchange.GetChainMessages(ctx, nextHeader, uint64(nreq-offset))
if err != nil {
requestErr = multierror.Append(requestErr, err)
} else {
requestResult = result
}
}
mx.Lock()
if requestResult == nil {
// we failed!
log.Errorf("error fetching messages at %d: %s", nextI, requestErr)
batchErr = multierror.Append(batchErr, requestErr)
failed = true
} else {
copy(bstout[j*syncRequestBatchSize+offset:], requestResult)
offset += len(requestResult)
}
mx.Unlock()
}
}(j)
}
wg.Wait()
ss.SetStage(api.StageMessages)
if batchErr != nil {
return xerrors.Errorf("failed to fetch messages: %w", err)
}
for bsi := 0; bsi < len(bstout); bsi++ {
// temp storage so we don't persist data we dont want to
bs := bstore.NewTemporary()
@ -1555,23 +1579,9 @@ mainLoop:
}
}
if i >= windowSize {
newWindowSize := windowSize + 10
if newWindowSize > int(exchange.MaxRequestLength) {
newWindowSize = int(exchange.MaxRequestLength)
}
if newWindowSize > windowSize {
windowSize = newWindowSize
log.Infof("successfully fetched %d messages; increasing window size to %d", len(bstout), windowSize)
}
}
i -= batchSize
}
// remember our window size
syncer.windowSize = windowSize
return nil
}