Support faster Get, retry flushes on error

This commit is contained in:
Aayush Rajasekaran 2022-01-11 20:29:26 -05:00 committed by Jennifer Wang
parent ce6e328d52
commit dafd1f77fa
2 changed files with 75 additions and 44 deletions

View File

@ -3,6 +3,7 @@ package blockstore
import ( import (
"context" "context"
"sync" "sync"
"time"
"golang.org/x/xerrors" "golang.org/x/xerrors"
@ -15,24 +16,39 @@ import (
var autolog = log.Named("auto") var autolog = log.Named("auto")
type AutobatchBlockstore struct { type AutobatchBlockstore struct {
bufferedBlks []block.Block // TODO: drop if memory consumption is too high
addedCids map[cid.Cid]struct{} addedCids map[cid.Cid]struct{}
bufferedBlksLk sync.Mutex
flushCh chan struct{} bufferedLk sync.Mutex
flushErr error bufferedBlksOrdered []block.Block
shutdownCh chan struct{} bufferedBlksMap map[cid.Cid]block.Block
flushCtx context.Context
backingBs Blockstore flushingLk sync.Mutex
flushingBlksMap map[cid.Cid]block.Block
flushCh chan struct{}
flushErr error
flushRetryDelay time.Duration
flushCtx context.Context
shutdownCh chan struct{}
backingBs Blockstore
bufferCapacity int bufferCapacity int
bufferSize int bufferSize int
} }
func NewAutobatch(ctx context.Context, backingBs Blockstore, bufferCapacity int) *AutobatchBlockstore { func NewAutobatch(ctx context.Context, backingBs Blockstore, bufferCapacity int) *AutobatchBlockstore {
bs := &AutobatchBlockstore{ bs := &AutobatchBlockstore{
backingBs: backingBs, addedCids: make(map[cid.Cid]struct{}),
bufferCapacity: bufferCapacity, backingBs: backingBs,
addedCids: make(map[cid.Cid]struct{}), bufferCapacity: bufferCapacity,
flushCtx: ctx, bufferedBlksMap: make(map[cid.Cid]block.Block),
flushingBlksMap: make(map[cid.Cid]block.Block),
flushCtx: ctx,
flushCh: make(chan struct{}, 1),
// could be made configable
flushRetryDelay: time.Second * 5,
} }
go bs.flushWorker() go bs.flushWorker()
@ -41,11 +57,12 @@ func NewAutobatch(ctx context.Context, backingBs Blockstore, bufferCapacity int)
} }
func (bs *AutobatchBlockstore) Put(ctx context.Context, blk block.Block) error { func (bs *AutobatchBlockstore) Put(ctx context.Context, blk block.Block) error {
bs.bufferedBlksLk.Lock() bs.bufferedLk.Lock()
_, ok := bs.addedCids[blk.Cid()] _, ok := bs.addedCids[blk.Cid()]
if !ok { if !ok {
bs.bufferedBlks = append(bs.bufferedBlks, blk)
bs.addedCids[blk.Cid()] = struct{}{} bs.addedCids[blk.Cid()] = struct{}{}
bs.bufferedBlksOrdered = append(bs.bufferedBlksOrdered, blk)
bs.bufferedBlksMap[blk.Cid()] = blk
bs.bufferSize += len(blk.RawData()) bs.bufferSize += len(blk.RawData())
if bs.bufferSize >= bs.bufferCapacity { if bs.bufferSize >= bs.bufferCapacity {
// signal that a flush is appropriate, may be ignored // signal that a flush is appropriate, may be ignored
@ -56,7 +73,7 @@ func (bs *AutobatchBlockstore) Put(ctx context.Context, blk block.Block) error {
} }
} }
} }
bs.bufferedBlksLk.Unlock() bs.bufferedLk.Unlock()
return nil return nil
} }
@ -65,9 +82,16 @@ func (bs *AutobatchBlockstore) flushWorker() {
select { select {
case <-bs.flushCh: case <-bs.flushCh:
putErr := bs.doFlush(bs.flushCtx) putErr := bs.doFlush(bs.flushCtx)
if putErr != nil { for putErr != nil {
autolog.Errorf("FLUSH ERRORED: %w", putErr) select {
bs.flushErr = xerrors.Errorf("%w, put error: %w", bs.flushErr, putErr) case <-bs.shutdownCh:
bs.flushErr = putErr
return
default:
autolog.Errorf("FLUSH ERRORED: %w, retrying in %v", putErr, bs.flushRetryDelay)
time.Sleep(bs.flushRetryDelay)
putErr = bs.doFlush(bs.flushCtx)
}
} }
case <-bs.shutdownCh: case <-bs.shutdownCh:
return return
@ -76,20 +100,24 @@ func (bs *AutobatchBlockstore) flushWorker() {
} }
func (bs *AutobatchBlockstore) doFlush(ctx context.Context) error { func (bs *AutobatchBlockstore) doFlush(ctx context.Context) error {
bs.bufferedBlksLk.Lock() bs.bufferedLk.Lock()
bs.flushingLk.Lock()
// We do NOT clear addedCids here, because its purpose is to expedite Puts // We do NOT clear addedCids here, because its purpose is to expedite Puts
toFlush := bs.bufferedBlks flushingBlksOrdered := bs.bufferedBlksOrdered
bs.bufferedBlks = []block.Block{} bs.flushingBlksMap = bs.bufferedBlksMap
bs.bufferedBlksLk.Unlock() bs.bufferedBlksOrdered = []block.Block{}
return bs.backingBs.PutMany(ctx, toFlush) bs.bufferedBlksMap = make(map[cid.Cid]block.Block)
} bs.bufferedLk.Unlock()
bs.flushingLk.Unlock()
func (bs *AutobatchBlockstore) Flush(ctx context.Context) error { return bs.backingBs.PutMany(ctx, flushingBlksOrdered)
return bs.doFlush(ctx)
} }
func (bs *AutobatchBlockstore) Shutdown(ctx context.Context) error { func (bs *AutobatchBlockstore) Shutdown(ctx context.Context) error {
// request one last flush of the worker
bs.flushCh <- struct{}{}
// shutdown the flush worker
bs.shutdownCh <- struct{}{} bs.shutdownCh <- struct{}{}
// if it ever errored, this method fails
if bs.flushErr != nil { if bs.flushErr != nil {
return xerrors.Errorf("flushWorker errored: %w", bs.flushErr) return xerrors.Errorf("flushWorker errored: %w", bs.flushErr)
} }
@ -98,8 +126,8 @@ func (bs *AutobatchBlockstore) Shutdown(ctx context.Context) error {
return bs.doFlush(ctx) return bs.doFlush(ctx)
} }
// May be very slow if the cid queried wasn't in the backingBs at the time of creation of this AutobatchBlockstore
func (bs *AutobatchBlockstore) Get(ctx context.Context, c cid.Cid) (block.Block, error) { func (bs *AutobatchBlockstore) Get(ctx context.Context, c cid.Cid) (block.Block, error) {
// may seem backward to check the backingBs first, but that is the likeliest case
blk, err := bs.backingBs.Get(ctx, c) blk, err := bs.backingBs.Get(ctx, c)
if err == nil { if err == nil {
return blk, nil return blk, nil
@ -109,16 +137,24 @@ func (bs *AutobatchBlockstore) Get(ctx context.Context, c cid.Cid) (block.Block,
return blk, err return blk, err
} }
bs.Flush(ctx) bs.flushingLk.Lock()
return bs.backingBs.Get(ctx, c) v, ok := bs.flushingBlksMap[c]
bs.flushingLk.Unlock()
if ok {
return v, nil
}
bs.bufferedLk.Lock()
v, ok = bs.bufferedBlksMap[c]
bs.bufferedLk.Unlock()
if ok {
return v, nil
}
return nil, ErrNotFound
} }
func (bs *AutobatchBlockstore) DeleteBlock(context.Context, cid.Cid) error { func (bs *AutobatchBlockstore) DeleteBlock(context.Context, cid.Cid) error {
// if we wanted to support this, we would have to:
// - flush
// - delete from the backingBs (if present)
// - remove from addedCids (if present)
// - if present in addedCids, also walk bufferedBlks and remove if present
return xerrors.New("deletion is unsupported") return xerrors.New("deletion is unsupported")
} }
@ -159,8 +195,8 @@ func (bs *AutobatchBlockstore) PutMany(ctx context.Context, blks []block.Block)
} }
func (bs *AutobatchBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { func (bs *AutobatchBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
bs.Flush(ctx) return nil, xerrors.New("unsupported")
return bs.backingBs.AllKeysChan(ctx)
} }
func (bs *AutobatchBlockstore) HashOnRead(enabled bool) { func (bs *AutobatchBlockstore) HashOnRead(enabled bool) {
@ -168,6 +204,5 @@ func (bs *AutobatchBlockstore) HashOnRead(enabled bool) {
} }
func (bs *AutobatchBlockstore) View(ctx context.Context, cid cid.Cid, callback func([]byte) error) error { func (bs *AutobatchBlockstore) View(ctx context.Context, cid cid.Cid, callback func([]byte) error) error {
bs.Flush(ctx) return xerrors.New("unsupported")
return bs.backingBs.View(ctx, cid, callback)
} }

View File

@ -1297,11 +1297,7 @@ func upgradeActorsV7Common(
return cid.Undef, xerrors.Errorf("failed to persist new state root: %w", err) return cid.Undef, xerrors.Errorf("failed to persist new state root: %w", err)
} }
// Persist the new tree. Blocks until the entire writeStore is in the state blockstore. // Persists the new tree and shuts down the flush worker
if err := writeStore.Flush(ctx); err != nil {
return cid.Undef, xerrors.Errorf("failed to flush writestore: %w", err)
}
if err := writeStore.Shutdown(ctx); err != nil { if err := writeStore.Shutdown(ctx); err != nil {
return cid.Undef, xerrors.Errorf("writeStore failed: %w", err) return cid.Undef, xerrors.Errorf("writeStore failed: %w", err)
} }