2022-01-11 16:31:59 +00:00
|
|
|
package blockstore
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"sync"
|
2022-01-12 01:29:26 +00:00
|
|
|
"time"
|
2022-01-11 16:31:59 +00:00
|
|
|
|
|
|
|
block "github.com/ipfs/go-block-format"
|
|
|
|
"github.com/ipfs/go-cid"
|
2022-06-28 11:09:59 +00:00
|
|
|
ipld "github.com/ipfs/go-ipld-format"
|
2022-06-14 15:00:51 +00:00
|
|
|
"golang.org/x/xerrors"
|
2022-01-11 16:31:59 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// autolog is a logger for the autobatching blockstore. It is subscoped from the
|
|
|
|
// blockstore logger.
|
|
|
|
var autolog = log.Named("auto")
|
|
|
|
|
2022-01-12 17:57:34 +00:00
|
|
|
// contains the same set of blocks twice, once as an ordered list for flushing, and as a map for fast access
|
|
|
|
type blockBatch struct {
|
|
|
|
blockList []block.Block
|
|
|
|
blockMap map[cid.Cid]block.Block
|
|
|
|
}
|
|
|
|
|
2022-01-11 16:31:59 +00:00
|
|
|
type AutobatchBlockstore struct {
|
2022-01-12 01:29:26 +00:00
|
|
|
// TODO: drop if memory consumption is too high
|
|
|
|
addedCids map[cid.Cid]struct{}
|
|
|
|
|
2022-01-12 20:03:34 +00:00
|
|
|
stateLock sync.Mutex
|
2022-01-12 17:57:34 +00:00
|
|
|
bufferedBatch blockBatch
|
2022-01-12 01:29:26 +00:00
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
flushingBatch blockBatch
|
|
|
|
flushErr error
|
2022-01-12 17:57:34 +00:00
|
|
|
|
|
|
|
flushCh chan struct{}
|
2022-01-12 01:29:26 +00:00
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
doFlushLock sync.Mutex
|
2022-01-12 01:29:26 +00:00
|
|
|
flushRetryDelay time.Duration
|
2022-01-12 22:37:29 +00:00
|
|
|
doneCh chan struct{}
|
|
|
|
shutdown context.CancelFunc
|
2022-01-12 01:29:26 +00:00
|
|
|
|
|
|
|
backingBs Blockstore
|
|
|
|
|
2022-01-11 16:31:59 +00:00
|
|
|
bufferCapacity int
|
|
|
|
bufferSize int
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewAutobatch(ctx context.Context, backingBs Blockstore, bufferCapacity int) *AutobatchBlockstore {
|
2022-01-12 22:37:29 +00:00
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
2022-01-11 16:31:59 +00:00
|
|
|
bs := &AutobatchBlockstore{
|
2022-01-12 17:57:34 +00:00
|
|
|
addedCids: make(map[cid.Cid]struct{}),
|
|
|
|
backingBs: backingBs,
|
|
|
|
bufferCapacity: bufferCapacity,
|
|
|
|
flushCh: make(chan struct{}, 1),
|
2022-01-12 22:37:29 +00:00
|
|
|
doneCh: make(chan struct{}),
|
2022-01-12 01:29:26 +00:00
|
|
|
// could be made configable
|
2022-01-12 17:57:34 +00:00
|
|
|
flushRetryDelay: time.Millisecond * 100,
|
2022-01-12 22:37:29 +00:00
|
|
|
shutdown: cancel,
|
2022-01-11 16:31:59 +00:00
|
|
|
}
|
|
|
|
|
2022-01-12 17:57:34 +00:00
|
|
|
bs.bufferedBatch.blockMap = make(map[cid.Cid]block.Block)
|
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
go bs.flushWorker(ctx)
|
2022-01-12 00:44:56 +00:00
|
|
|
|
2022-01-11 16:31:59 +00:00
|
|
|
return bs
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) Put(ctx context.Context, blk block.Block) error {
|
2022-01-12 20:03:34 +00:00
|
|
|
bs.stateLock.Lock()
|
|
|
|
defer bs.stateLock.Unlock()
|
2022-01-12 17:57:34 +00:00
|
|
|
|
2022-01-11 22:17:34 +00:00
|
|
|
_, ok := bs.addedCids[blk.Cid()]
|
|
|
|
if !ok {
|
|
|
|
bs.addedCids[blk.Cid()] = struct{}{}
|
2022-01-12 17:57:34 +00:00
|
|
|
bs.bufferedBatch.blockList = append(bs.bufferedBatch.blockList, blk)
|
|
|
|
bs.bufferedBatch.blockMap[blk.Cid()] = blk
|
2022-01-11 22:17:34 +00:00
|
|
|
bs.bufferSize += len(blk.RawData())
|
|
|
|
if bs.bufferSize >= bs.bufferCapacity {
|
2022-01-12 00:44:56 +00:00
|
|
|
// signal that a flush is appropriate, may be ignored
|
|
|
|
select {
|
|
|
|
case bs.flushCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
// do nothing
|
|
|
|
}
|
2022-01-11 22:17:34 +00:00
|
|
|
}
|
2022-01-11 16:31:59 +00:00
|
|
|
}
|
2022-01-12 17:57:34 +00:00
|
|
|
|
2022-01-11 16:31:59 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
func (bs *AutobatchBlockstore) flushWorker(ctx context.Context) {
|
|
|
|
defer close(bs.doneCh)
|
2022-01-12 00:44:56 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-bs.flushCh:
|
2022-01-12 22:37:29 +00:00
|
|
|
// TODO: check if we _should_ actually flush. We could get a spurious wakeup
|
|
|
|
// here.
|
|
|
|
putErr := bs.doFlush(ctx, false)
|
2022-01-12 01:29:26 +00:00
|
|
|
for putErr != nil {
|
|
|
|
select {
|
2022-01-12 22:37:29 +00:00
|
|
|
case <-ctx.Done():
|
2022-01-12 01:29:26 +00:00
|
|
|
return
|
2022-01-12 17:57:34 +00:00
|
|
|
case <-time.After(bs.flushRetryDelay):
|
|
|
|
autolog.Errorf("FLUSH ERRORED: %w, retrying after %v", putErr, bs.flushRetryDelay)
|
2022-01-12 22:37:29 +00:00
|
|
|
putErr = bs.doFlush(ctx, true)
|
2022-01-12 01:29:26 +00:00
|
|
|
}
|
2022-01-12 00:44:56 +00:00
|
|
|
}
|
2022-01-12 22:37:29 +00:00
|
|
|
case <-ctx.Done():
|
|
|
|
// Do one last flush.
|
|
|
|
_ = bs.doFlush(ctx, false)
|
2022-01-12 00:44:56 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-12 20:03:34 +00:00
|
|
|
// caller must NOT hold stateLock
|
2022-01-12 22:37:29 +00:00
|
|
|
// set retryOnly to true to only retry a failed flush and not flush anything new.
|
|
|
|
func (bs *AutobatchBlockstore) doFlush(ctx context.Context, retryOnly bool) error {
|
2022-01-12 20:03:34 +00:00
|
|
|
bs.doFlushLock.Lock()
|
|
|
|
defer bs.doFlushLock.Unlock()
|
2022-01-12 22:37:29 +00:00
|
|
|
|
|
|
|
// If we failed to flush last time, try flushing again.
|
|
|
|
if bs.flushErr != nil {
|
|
|
|
bs.flushErr = bs.backingBs.PutMany(ctx, bs.flushingBatch.blockList)
|
2022-01-12 17:57:34 +00:00
|
|
|
}
|
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
// If we failed, or we're _only_ retrying, bail.
|
|
|
|
if retryOnly || bs.flushErr != nil {
|
|
|
|
return bs.flushErr
|
|
|
|
}
|
|
|
|
|
|
|
|
// Then take the current batch...
|
2022-01-12 20:03:34 +00:00
|
|
|
bs.stateLock.Lock()
|
2022-01-12 22:37:29 +00:00
|
|
|
// We do NOT clear addedCids here, because its purpose is to expedite Puts
|
|
|
|
bs.flushingBatch = bs.bufferedBatch
|
|
|
|
bs.bufferedBatch.blockList = make([]block.Block, 0, len(bs.flushingBatch.blockList))
|
|
|
|
bs.bufferedBatch.blockMap = make(map[cid.Cid]block.Block, len(bs.flushingBatch.blockMap))
|
2022-01-12 20:03:34 +00:00
|
|
|
bs.stateLock.Unlock()
|
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
// And try to flush it.
|
|
|
|
bs.flushErr = bs.backingBs.PutMany(ctx, bs.flushingBatch.blockList)
|
|
|
|
|
|
|
|
// If we succeeded, reset the batch. Otherwise, we'll try again next time.
|
|
|
|
if bs.flushErr == nil {
|
|
|
|
bs.stateLock.Lock()
|
|
|
|
bs.flushingBatch = blockBatch{}
|
|
|
|
bs.stateLock.Unlock()
|
|
|
|
}
|
|
|
|
|
2022-01-12 17:57:34 +00:00
|
|
|
return bs.flushErr
|
|
|
|
}
|
|
|
|
|
2022-01-12 20:03:34 +00:00
|
|
|
// caller must NOT hold stateLock
|
2022-01-12 17:57:34 +00:00
|
|
|
func (bs *AutobatchBlockstore) Flush(ctx context.Context) error {
|
2022-01-12 22:37:29 +00:00
|
|
|
return bs.doFlush(ctx, false)
|
2022-01-12 00:44:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) Shutdown(ctx context.Context) error {
|
2022-01-12 22:37:29 +00:00
|
|
|
// TODO: Prevent puts after we call this to avoid losing data.
|
|
|
|
bs.shutdown()
|
|
|
|
select {
|
|
|
|
case <-bs.doneCh:
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
2022-01-12 20:03:34 +00:00
|
|
|
}
|
2022-01-12 00:44:56 +00:00
|
|
|
|
2022-01-12 22:37:29 +00:00
|
|
|
bs.doFlushLock.Lock()
|
|
|
|
defer bs.doFlushLock.Unlock()
|
|
|
|
|
2022-01-12 17:57:34 +00:00
|
|
|
return bs.flushErr
|
2022-01-11 22:44:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) Get(ctx context.Context, c cid.Cid) (block.Block, error) {
|
2022-01-12 01:29:26 +00:00
|
|
|
// may seem backward to check the backingBs first, but that is the likeliest case
|
2022-01-11 22:44:45 +00:00
|
|
|
blk, err := bs.backingBs.Get(ctx, c)
|
|
|
|
if err == nil {
|
|
|
|
return blk, nil
|
|
|
|
}
|
|
|
|
|
2022-06-28 11:09:59 +00:00
|
|
|
if !ipld.IsNotFound(err) {
|
2022-01-11 22:44:45 +00:00
|
|
|
return blk, err
|
|
|
|
}
|
|
|
|
|
2022-01-12 22:22:54 +00:00
|
|
|
bs.stateLock.Lock()
|
|
|
|
defer bs.stateLock.Unlock()
|
2022-01-12 17:57:34 +00:00
|
|
|
v, ok := bs.flushingBatch.blockMap[c]
|
2022-01-12 01:29:26 +00:00
|
|
|
if ok {
|
|
|
|
return v, nil
|
|
|
|
}
|
|
|
|
|
2022-01-12 20:03:34 +00:00
|
|
|
v, ok = bs.bufferedBatch.blockMap[c]
|
2022-01-12 01:29:26 +00:00
|
|
|
if ok {
|
|
|
|
return v, nil
|
|
|
|
}
|
|
|
|
|
2022-01-12 21:04:47 +00:00
|
|
|
return bs.Get(ctx, c)
|
2022-01-11 22:44:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) DeleteBlock(context.Context, cid.Cid) error {
|
2022-01-12 17:57:34 +00:00
|
|
|
// if we wanted to support this, we would have to:
|
|
|
|
// - flush
|
|
|
|
// - delete from the backingBs (if present)
|
|
|
|
// - remove from addedCids (if present)
|
|
|
|
// - if present in addedCids, also walk the ordered lists and remove if present
|
2022-01-11 22:44:45 +00:00
|
|
|
return xerrors.New("deletion is unsupported")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) DeleteMany(ctx context.Context, cids []cid.Cid) error {
|
|
|
|
// see note in DeleteBlock()
|
|
|
|
return xerrors.New("deletion is unsupported")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) {
|
|
|
|
_, err := bs.Get(ctx, c)
|
|
|
|
if err == nil {
|
|
|
|
return true, nil
|
|
|
|
}
|
2022-06-28 11:09:59 +00:00
|
|
|
if ipld.IsNotFound(err) {
|
2022-01-11 22:44:45 +00:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) {
|
|
|
|
blk, err := bs.Get(ctx, c)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return len(blk.RawData()), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) PutMany(ctx context.Context, blks []block.Block) error {
|
|
|
|
for _, blk := range blks {
|
|
|
|
if err := bs.Put(ctx, blk); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
|
2022-01-12 17:57:34 +00:00
|
|
|
if err := bs.Flush(ctx); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-01-12 01:29:26 +00:00
|
|
|
|
2022-01-12 17:57:34 +00:00
|
|
|
return bs.backingBs.AllKeysChan(ctx)
|
2022-01-11 22:44:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) HashOnRead(enabled bool) {
|
|
|
|
bs.backingBs.HashOnRead(enabled)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bs *AutobatchBlockstore) View(ctx context.Context, cid cid.Cid, callback func([]byte) error) error {
|
2022-01-12 20:03:34 +00:00
|
|
|
blk, err := bs.Get(ctx, cid)
|
|
|
|
if err != nil {
|
2022-01-12 17:57:34 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-01-12 20:03:34 +00:00
|
|
|
return callback(blk.RawData())
|
2022-01-11 16:31:59 +00:00
|
|
|
}
|