296 lines
6.4 KiB
Go
296 lines
6.4 KiB
Go
|
package format
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"errors"
|
||
|
"runtime"
|
||
|
|
||
|
cid "github.com/ipfs/go-cid"
|
||
|
)
|
||
|
|
||
|
// ParallelBatchCommits is the number of batch commits that can be in-flight before blocking.
|
||
|
// TODO(ipfs/go-ipfs#4299): Experiment with multiple datastores, storage
|
||
|
// devices, and CPUs to find the right value/formula.
|
||
|
var ParallelBatchCommits = runtime.NumCPU() * 2
|
||
|
|
||
|
// ErrNotCommited is returned when closing a batch that hasn't been successfully
|
||
|
// committed.
|
||
|
var ErrNotCommited = errors.New("error: batch not commited")
|
||
|
|
||
|
// ErrClosed is returned when operating on a batch that has already been closed.
|
||
|
var ErrClosed = errors.New("error: batch closed")
|
||
|
|
||
|
// NewBatch returns a node buffer (Batch) that buffers nodes internally and
|
||
|
// commits them to the underlying DAGService in batches. Use this if you intend
|
||
|
// to add or remove a lot of nodes all at once.
|
||
|
//
|
||
|
// If the passed context is canceled, any in-progress commits are aborted.
|
||
|
//
|
||
|
func NewBatch(ctx context.Context, na NodeAdder, opts ...BatchOption) *Batch {
|
||
|
ctx, cancel := context.WithCancel(ctx)
|
||
|
bopts := defaultBatchOptions
|
||
|
for _, o := range opts {
|
||
|
o(&bopts)
|
||
|
}
|
||
|
return &Batch{
|
||
|
na: na,
|
||
|
ctx: ctx,
|
||
|
cancel: cancel,
|
||
|
commitResults: make(chan error, ParallelBatchCommits),
|
||
|
opts: bopts,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Batch is a buffer for batching adds to a dag.
|
||
|
type Batch struct {
|
||
|
na NodeAdder
|
||
|
|
||
|
ctx context.Context
|
||
|
cancel func()
|
||
|
|
||
|
activeCommits int
|
||
|
err error
|
||
|
commitResults chan error
|
||
|
|
||
|
nodes []Node
|
||
|
size int
|
||
|
|
||
|
opts batchOptions
|
||
|
}
|
||
|
|
||
|
func (t *Batch) processResults() {
|
||
|
for t.activeCommits > 0 {
|
||
|
select {
|
||
|
case err := <-t.commitResults:
|
||
|
t.activeCommits--
|
||
|
if err != nil {
|
||
|
t.setError(err)
|
||
|
return
|
||
|
}
|
||
|
default:
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (t *Batch) asyncCommit() {
|
||
|
numBlocks := len(t.nodes)
|
||
|
if numBlocks == 0 {
|
||
|
return
|
||
|
}
|
||
|
if t.activeCommits >= ParallelBatchCommits {
|
||
|
select {
|
||
|
case err := <-t.commitResults:
|
||
|
t.activeCommits--
|
||
|
|
||
|
if err != nil {
|
||
|
t.setError(err)
|
||
|
return
|
||
|
}
|
||
|
case <-t.ctx.Done():
|
||
|
t.setError(t.ctx.Err())
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
go func(ctx context.Context, b []Node, result chan error, na NodeAdder) {
|
||
|
select {
|
||
|
case result <- na.AddMany(ctx, b):
|
||
|
case <-ctx.Done():
|
||
|
}
|
||
|
}(t.ctx, t.nodes, t.commitResults, t.na)
|
||
|
|
||
|
t.activeCommits++
|
||
|
t.nodes = make([]Node, 0, numBlocks)
|
||
|
t.size = 0
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Add adds a node to the batch and commits the batch if necessary.
|
||
|
func (t *Batch) Add(ctx context.Context, nd Node) error {
|
||
|
return t.AddMany(ctx, []Node{nd})
|
||
|
}
|
||
|
|
||
|
// AddMany many calls Add for every given Node, thus batching and
|
||
|
// commiting them as needed.
|
||
|
func (t *Batch) AddMany(ctx context.Context, nodes []Node) error {
|
||
|
if t.err != nil {
|
||
|
return t.err
|
||
|
}
|
||
|
// Not strictly necessary but allows us to catch errors early.
|
||
|
t.processResults()
|
||
|
|
||
|
if t.err != nil {
|
||
|
return t.err
|
||
|
}
|
||
|
|
||
|
t.nodes = append(t.nodes, nodes...)
|
||
|
for _, nd := range nodes {
|
||
|
t.size += len(nd.RawData())
|
||
|
}
|
||
|
|
||
|
if t.size > t.opts.maxSize || len(t.nodes) > t.opts.maxNodes {
|
||
|
t.asyncCommit()
|
||
|
}
|
||
|
return t.err
|
||
|
}
|
||
|
|
||
|
// Commit commits batched nodes.
|
||
|
func (t *Batch) Commit() error {
|
||
|
if t.err != nil {
|
||
|
return t.err
|
||
|
}
|
||
|
|
||
|
t.asyncCommit()
|
||
|
|
||
|
loop:
|
||
|
for t.activeCommits > 0 {
|
||
|
select {
|
||
|
case err := <-t.commitResults:
|
||
|
t.activeCommits--
|
||
|
if err != nil {
|
||
|
t.setError(err)
|
||
|
break loop
|
||
|
}
|
||
|
case <-t.ctx.Done():
|
||
|
t.setError(t.ctx.Err())
|
||
|
break loop
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return t.err
|
||
|
}
|
||
|
|
||
|
func (t *Batch) setError(err error) {
|
||
|
t.err = err
|
||
|
|
||
|
t.cancel()
|
||
|
|
||
|
// Drain as much as we can without blocking.
|
||
|
loop:
|
||
|
for {
|
||
|
select {
|
||
|
case <-t.commitResults:
|
||
|
default:
|
||
|
break loop
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Be nice and cleanup. These can take a *lot* of memory.
|
||
|
t.commitResults = nil
|
||
|
t.na = nil
|
||
|
t.ctx = nil
|
||
|
t.nodes = nil
|
||
|
t.size = 0
|
||
|
t.activeCommits = 0
|
||
|
}
|
||
|
|
||
|
// BatchOption provides a way of setting internal options of
|
||
|
// a Batch.
|
||
|
//
|
||
|
// See this post about the "functional options" pattern:
|
||
|
// http://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis
|
||
|
type BatchOption func(o *batchOptions)
|
||
|
|
||
|
type batchOptions struct {
|
||
|
maxSize int
|
||
|
maxNodes int
|
||
|
}
|
||
|
|
||
|
var defaultBatchOptions = batchOptions{
|
||
|
maxSize: 8 << 20,
|
||
|
|
||
|
// By default, only batch up to 128 nodes at a time.
|
||
|
// The current implementation of flatfs opens this many file
|
||
|
// descriptors at the same time for the optimized batch write.
|
||
|
maxNodes: 128,
|
||
|
}
|
||
|
|
||
|
// MaxSizeBatchOption sets the maximum size of a Batch.
|
||
|
func MaxSizeBatchOption(size int) BatchOption {
|
||
|
return func(o *batchOptions) {
|
||
|
o.maxSize = size
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// MaxNodesBatchOption sets the maximum number of nodes in a Batch.
|
||
|
func MaxNodesBatchOption(num int) BatchOption {
|
||
|
return func(o *batchOptions) {
|
||
|
o.maxNodes = num
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// BufferedDAG implements DAGService using a Batch NodeAdder to wrap add
|
||
|
// operations in the given DAGService. It will trigger Commit() before any
|
||
|
// non-Add operations, but otherwise calling Commit() is left to the user.
|
||
|
type BufferedDAG struct {
|
||
|
ds DAGService
|
||
|
b *Batch
|
||
|
}
|
||
|
|
||
|
// NewBufferedDAG creates a BufferedDAG using the given DAGService and the
|
||
|
// given options for the Batch NodeAdder.
|
||
|
func NewBufferedDAG(ctx context.Context, ds DAGService, opts ...BatchOption) *BufferedDAG {
|
||
|
return &BufferedDAG{
|
||
|
ds: ds,
|
||
|
b: NewBatch(ctx, ds, opts...),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Commit calls commit on the Batch.
|
||
|
func (bd *BufferedDAG) Commit() error {
|
||
|
return bd.b.Commit()
|
||
|
}
|
||
|
|
||
|
// Add adds a new node using Batch.
|
||
|
func (bd *BufferedDAG) Add(ctx context.Context, n Node) error {
|
||
|
return bd.b.Add(ctx, n)
|
||
|
}
|
||
|
|
||
|
// AddMany adds many nodes using Batch.
|
||
|
func (bd *BufferedDAG) AddMany(ctx context.Context, nds []Node) error {
|
||
|
return bd.b.AddMany(ctx, nds)
|
||
|
}
|
||
|
|
||
|
// Get commits and gets a node from the DAGService.
|
||
|
func (bd *BufferedDAG) Get(ctx context.Context, c cid.Cid) (Node, error) {
|
||
|
err := bd.b.Commit()
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return bd.ds.Get(ctx, c)
|
||
|
}
|
||
|
|
||
|
// GetMany commits and gets nodes from the DAGService.
|
||
|
func (bd *BufferedDAG) GetMany(ctx context.Context, cs []cid.Cid) <-chan *NodeOption {
|
||
|
err := bd.b.Commit()
|
||
|
if err != nil {
|
||
|
ch := make(chan *NodeOption, 1)
|
||
|
defer close(ch)
|
||
|
ch <- &NodeOption{
|
||
|
Node: nil,
|
||
|
Err: err,
|
||
|
}
|
||
|
return ch
|
||
|
}
|
||
|
return bd.ds.GetMany(ctx, cs)
|
||
|
}
|
||
|
|
||
|
// Remove commits and removes a node from the DAGService.
|
||
|
func (bd *BufferedDAG) Remove(ctx context.Context, c cid.Cid) error {
|
||
|
err := bd.b.Commit()
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
return bd.ds.Remove(ctx, c)
|
||
|
}
|
||
|
|
||
|
// RemoveMany commits and removes nodes from the DAGService.
|
||
|
func (bd *BufferedDAG) RemoveMany(ctx context.Context, cs []cid.Cid) error {
|
||
|
err := bd.b.Commit()
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
return bd.ds.RemoveMany(ctx, cs)
|
||
|
}
|